[llvm] [WebAssembly] [Backend] Wasm optimize illegal bitmask (PR #145627)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 27 11:08:09 PDT 2025


https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/145627

>From 9dd2a14e355e8e1fc530ea83dabea647531d895c Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Mon, 23 Jun 2025 15:24:29 -0700
Subject: [PATCH 1/2] [WebAssembly] Precommit test for optimizing illegal
 bitmask

---
 .../WebAssembly/simd-illegal-bitmask.ll       | 1100 +++++++++++++++++
 1 file changed, 1100 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll

diff --git a/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
new file mode 100644
index 0000000000000..1715dc03c0917
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
@@ -0,0 +1,1100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O3 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
+
+
+target triple = "wasm64"
+
+
+define i16 @legal_bitcast_v16i8(<16 x i8> %x) {
+; CHECK-LABEL: legal_bitcast_v16i8:
+; CHECK:         .functype legal_bitcast_v16i8 (v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push0=, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+; CHECK-NEXT:    i8x16.eq $push1=, $0, $pop0
+; CHECK-NEXT:    i8x16.bitmask $push2=, $pop1
+; CHECK-NEXT:    return $pop2
+    %z = icmp eq <16 x i8> %x, splat (i8 16)
+    %res = bitcast <16 x i1> %z to i16
+    ret i16 %res
+}
+
+define i32 @optimize_illegal_bitcast_v32i8(<32 x i8> %x) {
+; CHECK-LABEL: optimize_illegal_bitcast_v32i8:
+; CHECK:         .functype optimize_illegal_bitcast_v32i8 (v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get $push157=, __stack_pointer
+; CHECK-NEXT:    i64.const $push158=, 16
+; CHECK-NEXT:    i64.sub $drop=, $pop157, $pop158
+; CHECK-NEXT:    v128.const $push194=, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+; CHECK-NEXT:    local.tee $push193=, $2=, $pop194
+; CHECK-NEXT:    i8x16.eq $push192=, $0, $pop193
+; CHECK-NEXT:    local.tee $push191=, $0=, $pop192
+; CHECK-NEXT:    i8x16.extract_lane_u $push79=, $pop191, 0
+; CHECK-NEXT:    i32.const $push1=, 1
+; CHECK-NEXT:    i32.and $push80=, $pop79, $pop1
+; CHECK-NEXT:    i8x16.extract_lane_u $push81=, $0, 1
+; CHECK-NEXT:    i32.const $push190=, 1
+; CHECK-NEXT:    i32.and $push82=, $pop81, $pop190
+; CHECK-NEXT:    i32.const $push189=, 1
+; CHECK-NEXT:    i32.shl $push83=, $pop82, $pop189
+; CHECK-NEXT:    i32.or $push84=, $pop80, $pop83
+; CHECK-NEXT:    i8x16.extract_lane_u $push85=, $0, 2
+; CHECK-NEXT:    i32.const $push188=, 1
+; CHECK-NEXT:    i32.and $push86=, $pop85, $pop188
+; CHECK-NEXT:    i32.const $push87=, 2
+; CHECK-NEXT:    i32.shl $push88=, $pop86, $pop87
+; CHECK-NEXT:    i32.or $push89=, $pop84, $pop88
+; CHECK-NEXT:    i8x16.extract_lane_u $push90=, $0, 3
+; CHECK-NEXT:    i32.const $push187=, 1
+; CHECK-NEXT:    i32.and $push91=, $pop90, $pop187
+; CHECK-NEXT:    i32.const $push92=, 3
+; CHECK-NEXT:    i32.shl $push93=, $pop91, $pop92
+; CHECK-NEXT:    i32.or $push94=, $pop89, $pop93
+; CHECK-NEXT:    i8x16.extract_lane_u $push95=, $0, 4
+; CHECK-NEXT:    i32.const $push186=, 1
+; CHECK-NEXT:    i32.and $push96=, $pop95, $pop186
+; CHECK-NEXT:    i32.const $push97=, 4
+; CHECK-NEXT:    i32.shl $push98=, $pop96, $pop97
+; CHECK-NEXT:    i32.or $push99=, $pop94, $pop98
+; CHECK-NEXT:    i8x16.extract_lane_u $push100=, $0, 5
+; CHECK-NEXT:    i32.const $push185=, 1
+; CHECK-NEXT:    i32.and $push101=, $pop100, $pop185
+; CHECK-NEXT:    i32.const $push102=, 5
+; CHECK-NEXT:    i32.shl $push103=, $pop101, $pop102
+; CHECK-NEXT:    i32.or $push104=, $pop99, $pop103
+; CHECK-NEXT:    i8x16.extract_lane_u $push105=, $0, 6
+; CHECK-NEXT:    i32.const $push184=, 1
+; CHECK-NEXT:    i32.and $push106=, $pop105, $pop184
+; CHECK-NEXT:    i32.const $push107=, 6
+; CHECK-NEXT:    i32.shl $push108=, $pop106, $pop107
+; CHECK-NEXT:    i32.or $push109=, $pop104, $pop108
+; CHECK-NEXT:    i8x16.extract_lane_u $push110=, $0, 7
+; CHECK-NEXT:    i32.const $push183=, 1
+; CHECK-NEXT:    i32.and $push111=, $pop110, $pop183
+; CHECK-NEXT:    i32.const $push112=, 7
+; CHECK-NEXT:    i32.shl $push113=, $pop111, $pop112
+; CHECK-NEXT:    i32.or $push114=, $pop109, $pop113
+; CHECK-NEXT:    i8x16.extract_lane_u $push115=, $0, 8
+; CHECK-NEXT:    i32.const $push182=, 1
+; CHECK-NEXT:    i32.and $push116=, $pop115, $pop182
+; CHECK-NEXT:    i32.const $push117=, 8
+; CHECK-NEXT:    i32.shl $push118=, $pop116, $pop117
+; CHECK-NEXT:    i32.or $push119=, $pop114, $pop118
+; CHECK-NEXT:    i8x16.extract_lane_u $push120=, $0, 9
+; CHECK-NEXT:    i32.const $push181=, 1
+; CHECK-NEXT:    i32.and $push121=, $pop120, $pop181
+; CHECK-NEXT:    i32.const $push122=, 9
+; CHECK-NEXT:    i32.shl $push123=, $pop121, $pop122
+; CHECK-NEXT:    i32.or $push124=, $pop119, $pop123
+; CHECK-NEXT:    i8x16.extract_lane_u $push125=, $0, 10
+; CHECK-NEXT:    i32.const $push180=, 1
+; CHECK-NEXT:    i32.and $push126=, $pop125, $pop180
+; CHECK-NEXT:    i32.const $push127=, 10
+; CHECK-NEXT:    i32.shl $push128=, $pop126, $pop127
+; CHECK-NEXT:    i32.or $push129=, $pop124, $pop128
+; CHECK-NEXT:    i8x16.extract_lane_u $push130=, $0, 11
+; CHECK-NEXT:    i32.const $push179=, 1
+; CHECK-NEXT:    i32.and $push131=, $pop130, $pop179
+; CHECK-NEXT:    i32.const $push132=, 11
+; CHECK-NEXT:    i32.shl $push133=, $pop131, $pop132
+; CHECK-NEXT:    i32.or $push134=, $pop129, $pop133
+; CHECK-NEXT:    i8x16.extract_lane_u $push135=, $0, 12
+; CHECK-NEXT:    i32.const $push178=, 1
+; CHECK-NEXT:    i32.and $push136=, $pop135, $pop178
+; CHECK-NEXT:    i32.const $push137=, 12
+; CHECK-NEXT:    i32.shl $push138=, $pop136, $pop137
+; CHECK-NEXT:    i32.or $push139=, $pop134, $pop138
+; CHECK-NEXT:    i8x16.extract_lane_u $push140=, $0, 13
+; CHECK-NEXT:    i32.const $push177=, 1
+; CHECK-NEXT:    i32.and $push141=, $pop140, $pop177
+; CHECK-NEXT:    i32.const $push142=, 13
+; CHECK-NEXT:    i32.shl $push143=, $pop141, $pop142
+; CHECK-NEXT:    i32.or $push144=, $pop139, $pop143
+; CHECK-NEXT:    i8x16.extract_lane_u $push145=, $0, 14
+; CHECK-NEXT:    i32.const $push176=, 1
+; CHECK-NEXT:    i32.and $push146=, $pop145, $pop176
+; CHECK-NEXT:    i32.const $push147=, 14
+; CHECK-NEXT:    i32.shl $push148=, $pop146, $pop147
+; CHECK-NEXT:    i32.or $push149=, $pop144, $pop148
+; CHECK-NEXT:    i8x16.extract_lane_u $push150=, $0, 15
+; CHECK-NEXT:    i32.const $push151=, 15
+; CHECK-NEXT:    i32.shl $push152=, $pop150, $pop151
+; CHECK-NEXT:    i32.or $push153=, $pop149, $pop152
+; CHECK-NEXT:    i32.const $push154=, 65535
+; CHECK-NEXT:    i32.and $push155=, $pop153, $pop154
+; CHECK-NEXT:    i8x16.eq $push175=, $1, $2
+; CHECK-NEXT:    local.tee $push174=, $0=, $pop175
+; CHECK-NEXT:    i8x16.extract_lane_u $push75=, $pop174, 15
+; CHECK-NEXT:    i32.const $push76=, 31
+; CHECK-NEXT:    i32.shl $push77=, $pop75, $pop76
+; CHECK-NEXT:    i8x16.extract_lane_u $push70=, $0, 14
+; CHECK-NEXT:    i32.const $push173=, 1
+; CHECK-NEXT:    i32.and $push71=, $pop70, $pop173
+; CHECK-NEXT:    i32.const $push72=, 30
+; CHECK-NEXT:    i32.shl $push73=, $pop71, $pop72
+; CHECK-NEXT:    i8x16.extract_lane_u $push65=, $0, 13
+; CHECK-NEXT:    i32.const $push172=, 1
+; CHECK-NEXT:    i32.and $push66=, $pop65, $pop172
+; CHECK-NEXT:    i32.const $push67=, 29
+; CHECK-NEXT:    i32.shl $push68=, $pop66, $pop67
+; CHECK-NEXT:    i8x16.extract_lane_u $push60=, $0, 12
+; CHECK-NEXT:    i32.const $push171=, 1
+; CHECK-NEXT:    i32.and $push61=, $pop60, $pop171
+; CHECK-NEXT:    i32.const $push62=, 28
+; CHECK-NEXT:    i32.shl $push63=, $pop61, $pop62
+; CHECK-NEXT:    i8x16.extract_lane_u $push55=, $0, 11
+; CHECK-NEXT:    i32.const $push170=, 1
+; CHECK-NEXT:    i32.and $push56=, $pop55, $pop170
+; CHECK-NEXT:    i32.const $push57=, 27
+; CHECK-NEXT:    i32.shl $push58=, $pop56, $pop57
+; CHECK-NEXT:    i8x16.extract_lane_u $push50=, $0, 10
+; CHECK-NEXT:    i32.const $push169=, 1
+; CHECK-NEXT:    i32.and $push51=, $pop50, $pop169
+; CHECK-NEXT:    i32.const $push52=, 26
+; CHECK-NEXT:    i32.shl $push53=, $pop51, $pop52
+; CHECK-NEXT:    i8x16.extract_lane_u $push45=, $0, 9
+; CHECK-NEXT:    i32.const $push168=, 1
+; CHECK-NEXT:    i32.and $push46=, $pop45, $pop168
+; CHECK-NEXT:    i32.const $push47=, 25
+; CHECK-NEXT:    i32.shl $push48=, $pop46, $pop47
+; CHECK-NEXT:    i8x16.extract_lane_u $push40=, $0, 8
+; CHECK-NEXT:    i32.const $push167=, 1
+; CHECK-NEXT:    i32.and $push41=, $pop40, $pop167
+; CHECK-NEXT:    i32.const $push42=, 24
+; CHECK-NEXT:    i32.shl $push43=, $pop41, $pop42
+; CHECK-NEXT:    i8x16.extract_lane_u $push35=, $0, 7
+; CHECK-NEXT:    i32.const $push166=, 1
+; CHECK-NEXT:    i32.and $push36=, $pop35, $pop166
+; CHECK-NEXT:    i32.const $push37=, 23
+; CHECK-NEXT:    i32.shl $push38=, $pop36, $pop37
+; CHECK-NEXT:    i8x16.extract_lane_u $push30=, $0, 6
+; CHECK-NEXT:    i32.const $push165=, 1
+; CHECK-NEXT:    i32.and $push31=, $pop30, $pop165
+; CHECK-NEXT:    i32.const $push32=, 22
+; CHECK-NEXT:    i32.shl $push33=, $pop31, $pop32
+; CHECK-NEXT:    i8x16.extract_lane_u $push25=, $0, 5
+; CHECK-NEXT:    i32.const $push164=, 1
+; CHECK-NEXT:    i32.and $push26=, $pop25, $pop164
+; CHECK-NEXT:    i32.const $push27=, 21
+; CHECK-NEXT:    i32.shl $push28=, $pop26, $pop27
+; CHECK-NEXT:    i8x16.extract_lane_u $push20=, $0, 4
+; CHECK-NEXT:    i32.const $push163=, 1
+; CHECK-NEXT:    i32.and $push21=, $pop20, $pop163
+; CHECK-NEXT:    i32.const $push22=, 20
+; CHECK-NEXT:    i32.shl $push23=, $pop21, $pop22
+; CHECK-NEXT:    i8x16.extract_lane_u $push15=, $0, 3
+; CHECK-NEXT:    i32.const $push162=, 1
+; CHECK-NEXT:    i32.and $push16=, $pop15, $pop162
+; CHECK-NEXT:    i32.const $push17=, 19
+; CHECK-NEXT:    i32.shl $push18=, $pop16, $pop17
+; CHECK-NEXT:    i8x16.extract_lane_u $push10=, $0, 2
+; CHECK-NEXT:    i32.const $push161=, 1
+; CHECK-NEXT:    i32.and $push11=, $pop10, $pop161
+; CHECK-NEXT:    i32.const $push12=, 18
+; CHECK-NEXT:    i32.shl $push13=, $pop11, $pop12
+; CHECK-NEXT:    i8x16.extract_lane_u $push5=, $0, 1
+; CHECK-NEXT:    i32.const $push160=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop5, $pop160
+; CHECK-NEXT:    i32.const $push7=, 17
+; CHECK-NEXT:    i32.shl $push8=, $pop6, $pop7
+; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $0, 0
+; CHECK-NEXT:    i32.const $push159=, 1
+; CHECK-NEXT:    i32.and $push2=, $pop0, $pop159
+; CHECK-NEXT:    i32.const $push3=, 16
+; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop3
+; CHECK-NEXT:    i32.or $push9=, $pop8, $pop4
+; CHECK-NEXT:    i32.or $push14=, $pop13, $pop9
+; CHECK-NEXT:    i32.or $push19=, $pop18, $pop14
+; CHECK-NEXT:    i32.or $push24=, $pop23, $pop19
+; CHECK-NEXT:    i32.or $push29=, $pop28, $pop24
+; CHECK-NEXT:    i32.or $push34=, $pop33, $pop29
+; CHECK-NEXT:    i32.or $push39=, $pop38, $pop34
+; CHECK-NEXT:    i32.or $push44=, $pop43, $pop39
+; CHECK-NEXT:    i32.or $push49=, $pop48, $pop44
+; CHECK-NEXT:    i32.or $push54=, $pop53, $pop49
+; CHECK-NEXT:    i32.or $push59=, $pop58, $pop54
+; CHECK-NEXT:    i32.or $push64=, $pop63, $pop59
+; CHECK-NEXT:    i32.or $push69=, $pop68, $pop64
+; CHECK-NEXT:    i32.or $push74=, $pop73, $pop69
+; CHECK-NEXT:    i32.or $push78=, $pop77, $pop74
+; CHECK-NEXT:    i32.or $push156=, $pop155, $pop78
+; CHECK-NEXT:    return $pop156
+    %z = icmp eq <32 x i8> %x, splat (i8 32)
+    %res = bitcast <32 x i1> %z to i32
+    ret i32 %res
+}
+
+
+define i64 @optimize_illegal_bitcast_v64i8(<64 x i8> %x) {
+; CHECK-LABEL: optimize_illegal_bitcast_v64i8:
+; CHECK:         .functype optimize_illegal_bitcast_v64i8 (v128, v128, v128, v128) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get $push287=, __stack_pointer
+; CHECK-NEXT:    i64.const $push288=, 16
+; CHECK-NEXT:    i64.sub $drop=, $pop287, $pop288
+; CHECK-NEXT:    v128.const $push390=, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+; CHECK-NEXT:    local.tee $push389=, $4=, $pop390
+; CHECK-NEXT:    i8x16.eq $push388=, $0, $pop389
+; CHECK-NEXT:    local.tee $push387=, $0=, $pop388
+; CHECK-NEXT:    i8x16.extract_lane_u $push222=, $pop387, 0
+; CHECK-NEXT:    i32.const $push1=, 1
+; CHECK-NEXT:    i32.and $push223=, $pop222, $pop1
+; CHECK-NEXT:    i8x16.extract_lane_u $push224=, $0, 1
+; CHECK-NEXT:    i32.const $push386=, 1
+; CHECK-NEXT:    i32.and $push225=, $pop224, $pop386
+; CHECK-NEXT:    i32.const $push385=, 1
+; CHECK-NEXT:    i32.shl $push226=, $pop225, $pop385
+; CHECK-NEXT:    i32.or $push227=, $pop223, $pop226
+; CHECK-NEXT:    i8x16.extract_lane_u $push228=, $0, 2
+; CHECK-NEXT:    i32.const $push384=, 1
+; CHECK-NEXT:    i32.and $push229=, $pop228, $pop384
+; CHECK-NEXT:    i32.const $push87=, 2
+; CHECK-NEXT:    i32.shl $push230=, $pop229, $pop87
+; CHECK-NEXT:    i32.or $push231=, $pop227, $pop230
+; CHECK-NEXT:    i8x16.extract_lane_u $push232=, $0, 3
+; CHECK-NEXT:    i32.const $push383=, 1
+; CHECK-NEXT:    i32.and $push233=, $pop232, $pop383
+; CHECK-NEXT:    i32.const $push92=, 3
+; CHECK-NEXT:    i32.shl $push234=, $pop233, $pop92
+; CHECK-NEXT:    i32.or $push235=, $pop231, $pop234
+; CHECK-NEXT:    i8x16.extract_lane_u $push236=, $0, 4
+; CHECK-NEXT:    i32.const $push382=, 1
+; CHECK-NEXT:    i32.and $push237=, $pop236, $pop382
+; CHECK-NEXT:    i32.const $push97=, 4
+; CHECK-NEXT:    i32.shl $push238=, $pop237, $pop97
+; CHECK-NEXT:    i32.or $push239=, $pop235, $pop238
+; CHECK-NEXT:    i8x16.extract_lane_u $push240=, $0, 5
+; CHECK-NEXT:    i32.const $push381=, 1
+; CHECK-NEXT:    i32.and $push241=, $pop240, $pop381
+; CHECK-NEXT:    i32.const $push102=, 5
+; CHECK-NEXT:    i32.shl $push242=, $pop241, $pop102
+; CHECK-NEXT:    i32.or $push243=, $pop239, $pop242
+; CHECK-NEXT:    i8x16.extract_lane_u $push244=, $0, 6
+; CHECK-NEXT:    i32.const $push380=, 1
+; CHECK-NEXT:    i32.and $push245=, $pop244, $pop380
+; CHECK-NEXT:    i32.const $push107=, 6
+; CHECK-NEXT:    i32.shl $push246=, $pop245, $pop107
+; CHECK-NEXT:    i32.or $push247=, $pop243, $pop246
+; CHECK-NEXT:    i8x16.extract_lane_u $push248=, $0, 7
+; CHECK-NEXT:    i32.const $push379=, 1
+; CHECK-NEXT:    i32.and $push249=, $pop248, $pop379
+; CHECK-NEXT:    i32.const $push112=, 7
+; CHECK-NEXT:    i32.shl $push250=, $pop249, $pop112
+; CHECK-NEXT:    i32.or $push251=, $pop247, $pop250
+; CHECK-NEXT:    i8x16.extract_lane_u $push252=, $0, 8
+; CHECK-NEXT:    i32.const $push378=, 1
+; CHECK-NEXT:    i32.and $push253=, $pop252, $pop378
+; CHECK-NEXT:    i32.const $push117=, 8
+; CHECK-NEXT:    i32.shl $push254=, $pop253, $pop117
+; CHECK-NEXT:    i32.or $push255=, $pop251, $pop254
+; CHECK-NEXT:    i8x16.extract_lane_u $push256=, $0, 9
+; CHECK-NEXT:    i32.const $push377=, 1
+; CHECK-NEXT:    i32.and $push257=, $pop256, $pop377
+; CHECK-NEXT:    i32.const $push122=, 9
+; CHECK-NEXT:    i32.shl $push258=, $pop257, $pop122
+; CHECK-NEXT:    i32.or $push259=, $pop255, $pop258
+; CHECK-NEXT:    i8x16.extract_lane_u $push260=, $0, 10
+; CHECK-NEXT:    i32.const $push376=, 1
+; CHECK-NEXT:    i32.and $push261=, $pop260, $pop376
+; CHECK-NEXT:    i32.const $push127=, 10
+; CHECK-NEXT:    i32.shl $push262=, $pop261, $pop127
+; CHECK-NEXT:    i32.or $push263=, $pop259, $pop262
+; CHECK-NEXT:    i8x16.extract_lane_u $push264=, $0, 11
+; CHECK-NEXT:    i32.const $push375=, 1
+; CHECK-NEXT:    i32.and $push265=, $pop264, $pop375
+; CHECK-NEXT:    i32.const $push132=, 11
+; CHECK-NEXT:    i32.shl $push266=, $pop265, $pop132
+; CHECK-NEXT:    i32.or $push267=, $pop263, $pop266
+; CHECK-NEXT:    i8x16.extract_lane_u $push268=, $0, 12
+; CHECK-NEXT:    i32.const $push374=, 1
+; CHECK-NEXT:    i32.and $push269=, $pop268, $pop374
+; CHECK-NEXT:    i32.const $push137=, 12
+; CHECK-NEXT:    i32.shl $push270=, $pop269, $pop137
+; CHECK-NEXT:    i32.or $push271=, $pop267, $pop270
+; CHECK-NEXT:    i8x16.extract_lane_u $push272=, $0, 13
+; CHECK-NEXT:    i32.const $push373=, 1
+; CHECK-NEXT:    i32.and $push273=, $pop272, $pop373
+; CHECK-NEXT:    i32.const $push142=, 13
+; CHECK-NEXT:    i32.shl $push274=, $pop273, $pop142
+; CHECK-NEXT:    i32.or $push275=, $pop271, $pop274
+; CHECK-NEXT:    i8x16.extract_lane_u $push276=, $0, 14
+; CHECK-NEXT:    i32.const $push372=, 1
+; CHECK-NEXT:    i32.and $push277=, $pop276, $pop372
+; CHECK-NEXT:    i32.const $push147=, 14
+; CHECK-NEXT:    i32.shl $push278=, $pop277, $pop147
+; CHECK-NEXT:    i32.or $push279=, $pop275, $pop278
+; CHECK-NEXT:    i8x16.extract_lane_u $push280=, $0, 15
+; CHECK-NEXT:    i32.const $push151=, 15
+; CHECK-NEXT:    i32.shl $push281=, $pop280, $pop151
+; CHECK-NEXT:    i32.or $push282=, $pop279, $pop281
+; CHECK-NEXT:    i32.const $push154=, 65535
+; CHECK-NEXT:    i32.and $push283=, $pop282, $pop154
+; CHECK-NEXT:    i8x16.eq $push371=, $1, $4
+; CHECK-NEXT:    local.tee $push370=, $0=, $pop371
+; CHECK-NEXT:    i8x16.extract_lane_u $push219=, $pop370, 15
+; CHECK-NEXT:    i32.const $push76=, 31
+; CHECK-NEXT:    i32.shl $push220=, $pop219, $pop76
+; CHECK-NEXT:    i8x16.extract_lane_u $push215=, $0, 14
+; CHECK-NEXT:    i32.const $push369=, 1
+; CHECK-NEXT:    i32.and $push216=, $pop215, $pop369
+; CHECK-NEXT:    i32.const $push72=, 30
+; CHECK-NEXT:    i32.shl $push217=, $pop216, $pop72
+; CHECK-NEXT:    i8x16.extract_lane_u $push211=, $0, 13
+; CHECK-NEXT:    i32.const $push368=, 1
+; CHECK-NEXT:    i32.and $push212=, $pop211, $pop368
+; CHECK-NEXT:    i32.const $push67=, 29
+; CHECK-NEXT:    i32.shl $push213=, $pop212, $pop67
+; CHECK-NEXT:    i8x16.extract_lane_u $push207=, $0, 12
+; CHECK-NEXT:    i32.const $push367=, 1
+; CHECK-NEXT:    i32.and $push208=, $pop207, $pop367
+; CHECK-NEXT:    i32.const $push62=, 28
+; CHECK-NEXT:    i32.shl $push209=, $pop208, $pop62
+; CHECK-NEXT:    i8x16.extract_lane_u $push203=, $0, 11
+; CHECK-NEXT:    i32.const $push366=, 1
+; CHECK-NEXT:    i32.and $push204=, $pop203, $pop366
+; CHECK-NEXT:    i32.const $push57=, 27
+; CHECK-NEXT:    i32.shl $push205=, $pop204, $pop57
+; CHECK-NEXT:    i8x16.extract_lane_u $push199=, $0, 10
+; CHECK-NEXT:    i32.const $push365=, 1
+; CHECK-NEXT:    i32.and $push200=, $pop199, $pop365
+; CHECK-NEXT:    i32.const $push52=, 26
+; CHECK-NEXT:    i32.shl $push201=, $pop200, $pop52
+; CHECK-NEXT:    i8x16.extract_lane_u $push195=, $0, 9
+; CHECK-NEXT:    i32.const $push364=, 1
+; CHECK-NEXT:    i32.and $push196=, $pop195, $pop364
+; CHECK-NEXT:    i32.const $push47=, 25
+; CHECK-NEXT:    i32.shl $push197=, $pop196, $pop47
+; CHECK-NEXT:    i8x16.extract_lane_u $push191=, $0, 8
+; CHECK-NEXT:    i32.const $push363=, 1
+; CHECK-NEXT:    i32.and $push192=, $pop191, $pop363
+; CHECK-NEXT:    i32.const $push42=, 24
+; CHECK-NEXT:    i32.shl $push193=, $pop192, $pop42
+; CHECK-NEXT:    i8x16.extract_lane_u $push187=, $0, 7
+; CHECK-NEXT:    i32.const $push362=, 1
+; CHECK-NEXT:    i32.and $push188=, $pop187, $pop362
+; CHECK-NEXT:    i32.const $push37=, 23
+; CHECK-NEXT:    i32.shl $push189=, $pop188, $pop37
+; CHECK-NEXT:    i8x16.extract_lane_u $push183=, $0, 6
+; CHECK-NEXT:    i32.const $push361=, 1
+; CHECK-NEXT:    i32.and $push184=, $pop183, $pop361
+; CHECK-NEXT:    i32.const $push32=, 22
+; CHECK-NEXT:    i32.shl $push185=, $pop184, $pop32
+; CHECK-NEXT:    i8x16.extract_lane_u $push179=, $0, 5
+; CHECK-NEXT:    i32.const $push360=, 1
+; CHECK-NEXT:    i32.and $push180=, $pop179, $pop360
+; CHECK-NEXT:    i32.const $push27=, 21
+; CHECK-NEXT:    i32.shl $push181=, $pop180, $pop27
+; CHECK-NEXT:    i8x16.extract_lane_u $push175=, $0, 4
+; CHECK-NEXT:    i32.const $push359=, 1
+; CHECK-NEXT:    i32.and $push176=, $pop175, $pop359
+; CHECK-NEXT:    i32.const $push22=, 20
+; CHECK-NEXT:    i32.shl $push177=, $pop176, $pop22
+; CHECK-NEXT:    i8x16.extract_lane_u $push171=, $0, 3
+; CHECK-NEXT:    i32.const $push358=, 1
+; CHECK-NEXT:    i32.and $push172=, $pop171, $pop358
+; CHECK-NEXT:    i32.const $push17=, 19
+; CHECK-NEXT:    i32.shl $push173=, $pop172, $pop17
+; CHECK-NEXT:    i8x16.extract_lane_u $push167=, $0, 2
+; CHECK-NEXT:    i32.const $push357=, 1
+; CHECK-NEXT:    i32.and $push168=, $pop167, $pop357
+; CHECK-NEXT:    i32.const $push12=, 18
+; CHECK-NEXT:    i32.shl $push169=, $pop168, $pop12
+; CHECK-NEXT:    i8x16.extract_lane_u $push163=, $0, 1
+; CHECK-NEXT:    i32.const $push356=, 1
+; CHECK-NEXT:    i32.and $push164=, $pop163, $pop356
+; CHECK-NEXT:    i32.const $push7=, 17
+; CHECK-NEXT:    i32.shl $push165=, $pop164, $pop7
+; CHECK-NEXT:    i8x16.extract_lane_u $push160=, $0, 0
+; CHECK-NEXT:    i32.const $push355=, 1
+; CHECK-NEXT:    i32.and $push161=, $pop160, $pop355
+; CHECK-NEXT:    i32.const $push3=, 16
+; CHECK-NEXT:    i32.shl $push162=, $pop161, $pop3
+; CHECK-NEXT:    i32.or $push166=, $pop165, $pop162
+; CHECK-NEXT:    i32.or $push170=, $pop169, $pop166
+; CHECK-NEXT:    i32.or $push174=, $pop173, $pop170
+; CHECK-NEXT:    i32.or $push178=, $pop177, $pop174
+; CHECK-NEXT:    i32.or $push182=, $pop181, $pop178
+; CHECK-NEXT:    i32.or $push186=, $pop185, $pop182
+; CHECK-NEXT:    i32.or $push190=, $pop189, $pop186
+; CHECK-NEXT:    i32.or $push194=, $pop193, $pop190
+; CHECK-NEXT:    i32.or $push198=, $pop197, $pop194
+; CHECK-NEXT:    i32.or $push202=, $pop201, $pop198
+; CHECK-NEXT:    i32.or $push206=, $pop205, $pop202
+; CHECK-NEXT:    i32.or $push210=, $pop209, $pop206
+; CHECK-NEXT:    i32.or $push214=, $pop213, $pop210
+; CHECK-NEXT:    i32.or $push218=, $pop217, $pop214
+; CHECK-NEXT:    i32.or $push221=, $pop220, $pop218
+; CHECK-NEXT:    i32.or $push284=, $pop283, $pop221
+; CHECK-NEXT:    i64.extend_i32_u $push285=, $pop284
+; CHECK-NEXT:    i8x16.eq $push354=, $2, $4
+; CHECK-NEXT:    local.tee $push353=, $0=, $pop354
+; CHECK-NEXT:    i8x16.extract_lane_u $push79=, $pop353, 0
+; CHECK-NEXT:    i32.const $push352=, 1
+; CHECK-NEXT:    i32.and $push80=, $pop79, $pop352
+; CHECK-NEXT:    i8x16.extract_lane_u $push81=, $0, 1
+; CHECK-NEXT:    i32.const $push351=, 1
+; CHECK-NEXT:    i32.and $push82=, $pop81, $pop351
+; CHECK-NEXT:    i32.const $push350=, 1
+; CHECK-NEXT:    i32.shl $push83=, $pop82, $pop350
+; CHECK-NEXT:    i32.or $push84=, $pop80, $pop83
+; CHECK-NEXT:    i8x16.extract_lane_u $push85=, $0, 2
+; CHECK-NEXT:    i32.const $push349=, 1
+; CHECK-NEXT:    i32.and $push86=, $pop85, $pop349
+; CHECK-NEXT:    i32.const $push348=, 2
+; CHECK-NEXT:    i32.shl $push88=, $pop86, $pop348
+; CHECK-NEXT:    i32.or $push89=, $pop84, $pop88
+; CHECK-NEXT:    i8x16.extract_lane_u $push90=, $0, 3
+; CHECK-NEXT:    i32.const $push347=, 1
+; CHECK-NEXT:    i32.and $push91=, $pop90, $pop347
+; CHECK-NEXT:    i32.const $push346=, 3
+; CHECK-NEXT:    i32.shl $push93=, $pop91, $pop346
+; CHECK-NEXT:    i32.or $push94=, $pop89, $pop93
+; CHECK-NEXT:    i8x16.extract_lane_u $push95=, $0, 4
+; CHECK-NEXT:    i32.const $push345=, 1
+; CHECK-NEXT:    i32.and $push96=, $pop95, $pop345
+; CHECK-NEXT:    i32.const $push344=, 4
+; CHECK-NEXT:    i32.shl $push98=, $pop96, $pop344
+; CHECK-NEXT:    i32.or $push99=, $pop94, $pop98
+; CHECK-NEXT:    i8x16.extract_lane_u $push100=, $0, 5
+; CHECK-NEXT:    i32.const $push343=, 1
+; CHECK-NEXT:    i32.and $push101=, $pop100, $pop343
+; CHECK-NEXT:    i32.const $push342=, 5
+; CHECK-NEXT:    i32.shl $push103=, $pop101, $pop342
+; CHECK-NEXT:    i32.or $push104=, $pop99, $pop103
+; CHECK-NEXT:    i8x16.extract_lane_u $push105=, $0, 6
+; CHECK-NEXT:    i32.const $push341=, 1
+; CHECK-NEXT:    i32.and $push106=, $pop105, $pop341
+; CHECK-NEXT:    i32.const $push340=, 6
+; CHECK-NEXT:    i32.shl $push108=, $pop106, $pop340
+; CHECK-NEXT:    i32.or $push109=, $pop104, $pop108
+; CHECK-NEXT:    i8x16.extract_lane_u $push110=, $0, 7
+; CHECK-NEXT:    i32.const $push339=, 1
+; CHECK-NEXT:    i32.and $push111=, $pop110, $pop339
+; CHECK-NEXT:    i32.const $push338=, 7
+; CHECK-NEXT:    i32.shl $push113=, $pop111, $pop338
+; CHECK-NEXT:    i32.or $push114=, $pop109, $pop113
+; CHECK-NEXT:    i8x16.extract_lane_u $push115=, $0, 8
+; CHECK-NEXT:    i32.const $push337=, 1
+; CHECK-NEXT:    i32.and $push116=, $pop115, $pop337
+; CHECK-NEXT:    i32.const $push336=, 8
+; CHECK-NEXT:    i32.shl $push118=, $pop116, $pop336
+; CHECK-NEXT:    i32.or $push119=, $pop114, $pop118
+; CHECK-NEXT:    i8x16.extract_lane_u $push120=, $0, 9
+; CHECK-NEXT:    i32.const $push335=, 1
+; CHECK-NEXT:    i32.and $push121=, $pop120, $pop335
+; CHECK-NEXT:    i32.const $push334=, 9
+; CHECK-NEXT:    i32.shl $push123=, $pop121, $pop334
+; CHECK-NEXT:    i32.or $push124=, $pop119, $pop123
+; CHECK-NEXT:    i8x16.extract_lane_u $push125=, $0, 10
+; CHECK-NEXT:    i32.const $push333=, 1
+; CHECK-NEXT:    i32.and $push126=, $pop125, $pop333
+; CHECK-NEXT:    i32.const $push332=, 10
+; CHECK-NEXT:    i32.shl $push128=, $pop126, $pop332
+; CHECK-NEXT:    i32.or $push129=, $pop124, $pop128
+; CHECK-NEXT:    i8x16.extract_lane_u $push130=, $0, 11
+; CHECK-NEXT:    i32.const $push331=, 1
+; CHECK-NEXT:    i32.and $push131=, $pop130, $pop331
+; CHECK-NEXT:    i32.const $push330=, 11
+; CHECK-NEXT:    i32.shl $push133=, $pop131, $pop330
+; CHECK-NEXT:    i32.or $push134=, $pop129, $pop133
+; CHECK-NEXT:    i8x16.extract_lane_u $push135=, $0, 12
+; CHECK-NEXT:    i32.const $push329=, 1
+; CHECK-NEXT:    i32.and $push136=, $pop135, $pop329
+; CHECK-NEXT:    i32.const $push328=, 12
+; CHECK-NEXT:    i32.shl $push138=, $pop136, $pop328
+; CHECK-NEXT:    i32.or $push139=, $pop134, $pop138
+; CHECK-NEXT:    i8x16.extract_lane_u $push140=, $0, 13
+; CHECK-NEXT:    i32.const $push327=, 1
+; CHECK-NEXT:    i32.and $push141=, $pop140, $pop327
+; CHECK-NEXT:    i32.const $push326=, 13
+; CHECK-NEXT:    i32.shl $push143=, $pop141, $pop326
+; CHECK-NEXT:    i32.or $push144=, $pop139, $pop143
+; CHECK-NEXT:    i8x16.extract_lane_u $push145=, $0, 14
+; CHECK-NEXT:    i32.const $push325=, 1
+; CHECK-NEXT:    i32.and $push146=, $pop145, $pop325
+; CHECK-NEXT:    i32.const $push324=, 14
+; CHECK-NEXT:    i32.shl $push148=, $pop146, $pop324
+; CHECK-NEXT:    i32.or $push149=, $pop144, $pop148
+; CHECK-NEXT:    i8x16.extract_lane_u $push150=, $0, 15
+; CHECK-NEXT:    i32.const $push323=, 15
+; CHECK-NEXT:    i32.shl $push152=, $pop150, $pop323
+; CHECK-NEXT:    i32.or $push153=, $pop149, $pop152
+; CHECK-NEXT:    i32.const $push322=, 65535
+; CHECK-NEXT:    i32.and $push155=, $pop153, $pop322
+; CHECK-NEXT:    i8x16.eq $push321=, $3, $4
+; CHECK-NEXT:    local.tee $push320=, $0=, $pop321
+; CHECK-NEXT:    i8x16.extract_lane_u $push75=, $pop320, 15
+; CHECK-NEXT:    i32.const $push319=, 31
+; CHECK-NEXT:    i32.shl $push77=, $pop75, $pop319
+; CHECK-NEXT:    i8x16.extract_lane_u $push70=, $0, 14
+; CHECK-NEXT:    i32.const $push318=, 1
+; CHECK-NEXT:    i32.and $push71=, $pop70, $pop318
+; CHECK-NEXT:    i32.const $push317=, 30
+; CHECK-NEXT:    i32.shl $push73=, $pop71, $pop317
+; CHECK-NEXT:    i8x16.extract_lane_u $push65=, $0, 13
+; CHECK-NEXT:    i32.const $push316=, 1
+; CHECK-NEXT:    i32.and $push66=, $pop65, $pop316
+; CHECK-NEXT:    i32.const $push315=, 29
+; CHECK-NEXT:    i32.shl $push68=, $pop66, $pop315
+; CHECK-NEXT:    i8x16.extract_lane_u $push60=, $0, 12
+; CHECK-NEXT:    i32.const $push314=, 1
+; CHECK-NEXT:    i32.and $push61=, $pop60, $pop314
+; CHECK-NEXT:    i32.const $push313=, 28
+; CHECK-NEXT:    i32.shl $push63=, $pop61, $pop313
+; CHECK-NEXT:    i8x16.extract_lane_u $push55=, $0, 11
+; CHECK-NEXT:    i32.const $push312=, 1
+; CHECK-NEXT:    i32.and $push56=, $pop55, $pop312
+; CHECK-NEXT:    i32.const $push311=, 27
+; CHECK-NEXT:    i32.shl $push58=, $pop56, $pop311
+; CHECK-NEXT:    i8x16.extract_lane_u $push50=, $0, 10
+; CHECK-NEXT:    i32.const $push310=, 1
+; CHECK-NEXT:    i32.and $push51=, $pop50, $pop310
+; CHECK-NEXT:    i32.const $push309=, 26
+; CHECK-NEXT:    i32.shl $push53=, $pop51, $pop309
+; CHECK-NEXT:    i8x16.extract_lane_u $push45=, $0, 9
+; CHECK-NEXT:    i32.const $push308=, 1
+; CHECK-NEXT:    i32.and $push46=, $pop45, $pop308
+; CHECK-NEXT:    i32.const $push307=, 25
+; CHECK-NEXT:    i32.shl $push48=, $pop46, $pop307
+; CHECK-NEXT:    i8x16.extract_lane_u $push40=, $0, 8
+; CHECK-NEXT:    i32.const $push306=, 1
+; CHECK-NEXT:    i32.and $push41=, $pop40, $pop306
+; CHECK-NEXT:    i32.const $push305=, 24
+; CHECK-NEXT:    i32.shl $push43=, $pop41, $pop305
+; CHECK-NEXT:    i8x16.extract_lane_u $push35=, $0, 7
+; CHECK-NEXT:    i32.const $push304=, 1
+; CHECK-NEXT:    i32.and $push36=, $pop35, $pop304
+; CHECK-NEXT:    i32.const $push303=, 23
+; CHECK-NEXT:    i32.shl $push38=, $pop36, $pop303
+; CHECK-NEXT:    i8x16.extract_lane_u $push30=, $0, 6
+; CHECK-NEXT:    i32.const $push302=, 1
+; CHECK-NEXT:    i32.and $push31=, $pop30, $pop302
+; CHECK-NEXT:    i32.const $push301=, 22
+; CHECK-NEXT:    i32.shl $push33=, $pop31, $pop301
+; CHECK-NEXT:    i8x16.extract_lane_u $push25=, $0, 5
+; CHECK-NEXT:    i32.const $push300=, 1
+; CHECK-NEXT:    i32.and $push26=, $pop25, $pop300
+; CHECK-NEXT:    i32.const $push299=, 21
+; CHECK-NEXT:    i32.shl $push28=, $pop26, $pop299
+; CHECK-NEXT:    i8x16.extract_lane_u $push20=, $0, 4
+; CHECK-NEXT:    i32.const $push298=, 1
+; CHECK-NEXT:    i32.and $push21=, $pop20, $pop298
+; CHECK-NEXT:    i32.const $push297=, 20
+; CHECK-NEXT:    i32.shl $push23=, $pop21, $pop297
+; CHECK-NEXT:    i8x16.extract_lane_u $push15=, $0, 3
+; CHECK-NEXT:    i32.const $push296=, 1
+; CHECK-NEXT:    i32.and $push16=, $pop15, $pop296
+; CHECK-NEXT:    i32.const $push295=, 19
+; CHECK-NEXT:    i32.shl $push18=, $pop16, $pop295
+; CHECK-NEXT:    i8x16.extract_lane_u $push10=, $0, 2
+; CHECK-NEXT:    i32.const $push294=, 1
+; CHECK-NEXT:    i32.and $push11=, $pop10, $pop294
+; CHECK-NEXT:    i32.const $push293=, 18
+; CHECK-NEXT:    i32.shl $push13=, $pop11, $pop293
+; CHECK-NEXT:    i8x16.extract_lane_u $push5=, $0, 1
+; CHECK-NEXT:    i32.const $push292=, 1
+; CHECK-NEXT:    i32.and $push6=, $pop5, $pop292
+; CHECK-NEXT:    i32.const $push291=, 17
+; CHECK-NEXT:    i32.shl $push8=, $pop6, $pop291
+; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $0, 0
+; CHECK-NEXT:    i32.const $push290=, 1
+; CHECK-NEXT:    i32.and $push2=, $pop0, $pop290
+; CHECK-NEXT:    i32.const $push289=, 16
+; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop289
+; CHECK-NEXT:    i32.or $push9=, $pop8, $pop4
+; CHECK-NEXT:    i32.or $push14=, $pop13, $pop9
+; CHECK-NEXT:    i32.or $push19=, $pop18, $pop14
+; CHECK-NEXT:    i32.or $push24=, $pop23, $pop19
+; CHECK-NEXT:    i32.or $push29=, $pop28, $pop24
+; CHECK-NEXT:    i32.or $push34=, $pop33, $pop29
+; CHECK-NEXT:    i32.or $push39=, $pop38, $pop34
+; CHECK-NEXT:    i32.or $push44=, $pop43, $pop39
+; CHECK-NEXT:    i32.or $push49=, $pop48, $pop44
+; CHECK-NEXT:    i32.or $push54=, $pop53, $pop49
+; CHECK-NEXT:    i32.or $push59=, $pop58, $pop54
+; CHECK-NEXT:    i32.or $push64=, $pop63, $pop59
+; CHECK-NEXT:    i32.or $push69=, $pop68, $pop64
+; CHECK-NEXT:    i32.or $push74=, $pop73, $pop69
+; CHECK-NEXT:    i32.or $push78=, $pop77, $pop74
+; CHECK-NEXT:    i32.or $push156=, $pop155, $pop78
+; CHECK-NEXT:    i64.extend_i32_u $push157=, $pop156
+; CHECK-NEXT:    i64.const $push158=, 32
+; CHECK-NEXT:    i64.shl $push159=, $pop157, $pop158
+; CHECK-NEXT:    i64.or $push286=, $pop285, $pop159
+; CHECK-NEXT:    return $pop286
+    %z = icmp eq <64 x i8> %x, splat (i8 64)
+    %res = bitcast <64 x i1> %z to i64
+    ret i64 %res
+}
+
+define i64 @optimize_illegal_bitcast_v64i4(<64 x i4> %x) {
+; CHECK-LABEL: optimize_illegal_bitcast_v64i4:
+; CHECK:         .functype optimize_illegal_bitcast_v64i4 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get $push355=, __stack_pointer
+; CHECK-NEXT:    i64.const $push356=, 16
+; CHECK-NEXT:    i64.sub $drop=, $pop355, $pop356
+; CHECK-NEXT:    i8x16.splat $push273=, $0
+; CHECK-NEXT:    i8x16.replace_lane $push274=, $pop273, 1, $1
+; CHECK-NEXT:    i8x16.replace_lane $push275=, $pop274, 2, $2
+; CHECK-NEXT:    i8x16.replace_lane $push276=, $pop275, 3, $3
+; CHECK-NEXT:    i8x16.replace_lane $push277=, $pop276, 4, $4
+; CHECK-NEXT:    i8x16.replace_lane $push278=, $pop277, 5, $5
+; CHECK-NEXT:    i8x16.replace_lane $push279=, $pop278, 6, $6
+; CHECK-NEXT:    i8x16.replace_lane $push280=, $pop279, 7, $7
+; CHECK-NEXT:    i8x16.replace_lane $push281=, $pop280, 8, $8
+; CHECK-NEXT:    i8x16.replace_lane $push282=, $pop281, 9, $9
+; CHECK-NEXT:    i8x16.replace_lane $push283=, $pop282, 10, $10
+; CHECK-NEXT:    i8x16.replace_lane $push284=, $pop283, 11, $11
+; CHECK-NEXT:    i8x16.replace_lane $push285=, $pop284, 12, $12
+; CHECK-NEXT:    i8x16.replace_lane $push286=, $pop285, 13, $13
+; CHECK-NEXT:    i8x16.replace_lane $push287=, $pop286, 14, $14
+; CHECK-NEXT:    i8x16.replace_lane $push288=, $pop287, 15, $15
+; CHECK-NEXT:    v128.const $push460=, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15
+; CHECK-NEXT:    local.tee $push459=, $64=, $pop460
+; CHECK-NEXT:    v128.and $push289=, $pop288, $pop459
+; CHECK-NEXT:    v128.const $push458=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push457=, $65=, $pop458
+; CHECK-NEXT:    i8x16.eq $push456=, $pop289, $pop457
+; CHECK-NEXT:    local.tee $push455=, $66=, $pop456
+; CHECK-NEXT:    i8x16.extract_lane_u $push290=, $pop455, 0
+; CHECK-NEXT:    i32.const $push18=, 1
+; CHECK-NEXT:    i32.and $push291=, $pop290, $pop18
+; CHECK-NEXT:    i8x16.extract_lane_u $push292=, $66, 1
+; CHECK-NEXT:    i32.const $push454=, 1
+; CHECK-NEXT:    i32.and $push293=, $pop292, $pop454
+; CHECK-NEXT:    i32.const $push453=, 1
+; CHECK-NEXT:    i32.shl $push294=, $pop293, $pop453
+; CHECK-NEXT:    i32.or $push295=, $pop291, $pop294
+; CHECK-NEXT:    i8x16.extract_lane_u $push296=, $66, 2
+; CHECK-NEXT:    i32.const $push452=, 1
+; CHECK-NEXT:    i32.and $push297=, $pop296, $pop452
+; CHECK-NEXT:    i32.const $push121=, 2
+; CHECK-NEXT:    i32.shl $push298=, $pop297, $pop121
+; CHECK-NEXT:    i32.or $push299=, $pop295, $pop298
+; CHECK-NEXT:    i8x16.extract_lane_u $push300=, $66, 3
+; CHECK-NEXT:    i32.const $push451=, 1
+; CHECK-NEXT:    i32.and $push301=, $pop300, $pop451
+; CHECK-NEXT:    i32.const $push126=, 3
+; CHECK-NEXT:    i32.shl $push302=, $pop301, $pop126
+; CHECK-NEXT:    i32.or $push303=, $pop299, $pop302
+; CHECK-NEXT:    i8x16.extract_lane_u $push304=, $66, 4
+; CHECK-NEXT:    i32.const $push450=, 1
+; CHECK-NEXT:    i32.and $push305=, $pop304, $pop450
+; CHECK-NEXT:    i32.const $push131=, 4
+; CHECK-NEXT:    i32.shl $push306=, $pop305, $pop131
+; CHECK-NEXT:    i32.or $push307=, $pop303, $pop306
+; CHECK-NEXT:    i8x16.extract_lane_u $push308=, $66, 5
+; CHECK-NEXT:    i32.const $push449=, 1
+; CHECK-NEXT:    i32.and $push309=, $pop308, $pop449
+; CHECK-NEXT:    i32.const $push136=, 5
+; CHECK-NEXT:    i32.shl $push310=, $pop309, $pop136
+; CHECK-NEXT:    i32.or $push311=, $pop307, $pop310
+; CHECK-NEXT:    i8x16.extract_lane_u $push312=, $66, 6
+; CHECK-NEXT:    i32.const $push448=, 1
+; CHECK-NEXT:    i32.and $push313=, $pop312, $pop448
+; CHECK-NEXT:    i32.const $push141=, 6
+; CHECK-NEXT:    i32.shl $push314=, $pop313, $pop141
+; CHECK-NEXT:    i32.or $push315=, $pop311, $pop314
+; CHECK-NEXT:    i8x16.extract_lane_u $push316=, $66, 7
+; CHECK-NEXT:    i32.const $push447=, 1
+; CHECK-NEXT:    i32.and $push317=, $pop316, $pop447
+; CHECK-NEXT:    i32.const $push146=, 7
+; CHECK-NEXT:    i32.shl $push318=, $pop317, $pop146
+; CHECK-NEXT:    i32.or $push319=, $pop315, $pop318
+; CHECK-NEXT:    i8x16.extract_lane_u $push320=, $66, 8
+; CHECK-NEXT:    i32.const $push446=, 1
+; CHECK-NEXT:    i32.and $push321=, $pop320, $pop446
+; CHECK-NEXT:    i32.const $push151=, 8
+; CHECK-NEXT:    i32.shl $push322=, $pop321, $pop151
+; CHECK-NEXT:    i32.or $push323=, $pop319, $pop322
+; CHECK-NEXT:    i8x16.extract_lane_u $push324=, $66, 9
+; CHECK-NEXT:    i32.const $push445=, 1
+; CHECK-NEXT:    i32.and $push325=, $pop324, $pop445
+; CHECK-NEXT:    i32.const $push156=, 9
+; CHECK-NEXT:    i32.shl $push326=, $pop325, $pop156
+; CHECK-NEXT:    i32.or $push327=, $pop323, $pop326
+; CHECK-NEXT:    i8x16.extract_lane_u $push328=, $66, 10
+; CHECK-NEXT:    i32.const $push444=, 1
+; CHECK-NEXT:    i32.and $push329=, $pop328, $pop444
+; CHECK-NEXT:    i32.const $push161=, 10
+; CHECK-NEXT:    i32.shl $push330=, $pop329, $pop161
+; CHECK-NEXT:    i32.or $push331=, $pop327, $pop330
+; CHECK-NEXT:    i8x16.extract_lane_u $push332=, $66, 11
+; CHECK-NEXT:    i32.const $push443=, 1
+; CHECK-NEXT:    i32.and $push333=, $pop332, $pop443
+; CHECK-NEXT:    i32.const $push166=, 11
+; CHECK-NEXT:    i32.shl $push334=, $pop333, $pop166
+; CHECK-NEXT:    i32.or $push335=, $pop331, $pop334
+; CHECK-NEXT:    i8x16.extract_lane_u $push336=, $66, 12
+; CHECK-NEXT:    i32.const $push442=, 1
+; CHECK-NEXT:    i32.and $push337=, $pop336, $pop442
+; CHECK-NEXT:    i32.const $push171=, 12
+; CHECK-NEXT:    i32.shl $push338=, $pop337, $pop171
+; CHECK-NEXT:    i32.or $push339=, $pop335, $pop338
+; CHECK-NEXT:    i8x16.extract_lane_u $push340=, $66, 13
+; CHECK-NEXT:    i32.const $push441=, 1
+; CHECK-NEXT:    i32.and $push341=, $pop340, $pop441
+; CHECK-NEXT:    i32.const $push176=, 13
+; CHECK-NEXT:    i32.shl $push342=, $pop341, $pop176
+; CHECK-NEXT:    i32.or $push343=, $pop339, $pop342
+; CHECK-NEXT:    i8x16.extract_lane_u $push344=, $66, 14
+; CHECK-NEXT:    i32.const $push440=, 1
+; CHECK-NEXT:    i32.and $push345=, $pop344, $pop440
+; CHECK-NEXT:    i32.const $push181=, 14
+; CHECK-NEXT:    i32.shl $push346=, $pop345, $pop181
+; CHECK-NEXT:    i32.or $push347=, $pop343, $pop346
+; CHECK-NEXT:    i8x16.extract_lane_u $push348=, $66, 15
+; CHECK-NEXT:    i32.const $push185=, 15
+; CHECK-NEXT:    i32.shl $push349=, $pop348, $pop185
+; CHECK-NEXT:    i32.or $push350=, $pop347, $pop349
+; CHECK-NEXT:    i32.const $push188=, 65535
+; CHECK-NEXT:    i32.and $push351=, $pop350, $pop188
+; CHECK-NEXT:    i8x16.splat $push194=, $16
+; CHECK-NEXT:    i8x16.replace_lane $push195=, $pop194, 1, $17
+; CHECK-NEXT:    i8x16.replace_lane $push196=, $pop195, 2, $18
+; CHECK-NEXT:    i8x16.replace_lane $push197=, $pop196, 3, $19
+; CHECK-NEXT:    i8x16.replace_lane $push198=, $pop197, 4, $20
+; CHECK-NEXT:    i8x16.replace_lane $push199=, $pop198, 5, $21
+; CHECK-NEXT:    i8x16.replace_lane $push200=, $pop199, 6, $22
+; CHECK-NEXT:    i8x16.replace_lane $push201=, $pop200, 7, $23
+; CHECK-NEXT:    i8x16.replace_lane $push202=, $pop201, 8, $24
+; CHECK-NEXT:    i8x16.replace_lane $push203=, $pop202, 9, $25
+; CHECK-NEXT:    i8x16.replace_lane $push204=, $pop203, 10, $26
+; CHECK-NEXT:    i8x16.replace_lane $push205=, $pop204, 11, $27
+; CHECK-NEXT:    i8x16.replace_lane $push206=, $pop205, 12, $28
+; CHECK-NEXT:    i8x16.replace_lane $push207=, $pop206, 13, $29
+; CHECK-NEXT:    i8x16.replace_lane $push208=, $pop207, 14, $30
+; CHECK-NEXT:    i8x16.replace_lane $push209=, $pop208, 15, $31
+; CHECK-NEXT:    v128.and $push210=, $pop209, $64
+; CHECK-NEXT:    i8x16.eq $push439=, $pop210, $65
+; CHECK-NEXT:    local.tee $push438=, $66=, $pop439
+; CHECK-NEXT:    i8x16.extract_lane_u $push270=, $pop438, 15
+; CHECK-NEXT:    i32.const $push93=, 31
+; CHECK-NEXT:    i32.shl $push271=, $pop270, $pop93
+; CHECK-NEXT:    i8x16.extract_lane_u $push266=, $66, 14
+; CHECK-NEXT:    i32.const $push437=, 1
+; CHECK-NEXT:    i32.and $push267=, $pop266, $pop437
+; CHECK-NEXT:    i32.const $push89=, 30
+; CHECK-NEXT:    i32.shl $push268=, $pop267, $pop89
+; CHECK-NEXT:    i8x16.extract_lane_u $push262=, $66, 13
+; CHECK-NEXT:    i32.const $push436=, 1
+; CHECK-NEXT:    i32.and $push263=, $pop262, $pop436
+; CHECK-NEXT:    i32.const $push84=, 29
+; CHECK-NEXT:    i32.shl $push264=, $pop263, $pop84
+; CHECK-NEXT:    i8x16.extract_lane_u $push258=, $66, 12
+; CHECK-NEXT:    i32.const $push435=, 1
+; CHECK-NEXT:    i32.and $push259=, $pop258, $pop435
+; CHECK-NEXT:    i32.const $push79=, 28
+; CHECK-NEXT:    i32.shl $push260=, $pop259, $pop79
+; CHECK-NEXT:    i8x16.extract_lane_u $push254=, $66, 11
+; CHECK-NEXT:    i32.const $push434=, 1
+; CHECK-NEXT:    i32.and $push255=, $pop254, $pop434
+; CHECK-NEXT:    i32.const $push74=, 27
+; CHECK-NEXT:    i32.shl $push256=, $pop255, $pop74
+; CHECK-NEXT:    i8x16.extract_lane_u $push250=, $66, 10
+; CHECK-NEXT:    i32.const $push433=, 1
+; CHECK-NEXT:    i32.and $push251=, $pop250, $pop433
+; CHECK-NEXT:    i32.const $push69=, 26
+; CHECK-NEXT:    i32.shl $push252=, $pop251, $pop69
+; CHECK-NEXT:    i8x16.extract_lane_u $push246=, $66, 9
+; CHECK-NEXT:    i32.const $push432=, 1
+; CHECK-NEXT:    i32.and $push247=, $pop246, $pop432
+; CHECK-NEXT:    i32.const $push64=, 25
+; CHECK-NEXT:    i32.shl $push248=, $pop247, $pop64
+; CHECK-NEXT:    i8x16.extract_lane_u $push242=, $66, 8
+; CHECK-NEXT:    i32.const $push431=, 1
+; CHECK-NEXT:    i32.and $push243=, $pop242, $pop431
+; CHECK-NEXT:    i32.const $push59=, 24
+; CHECK-NEXT:    i32.shl $push244=, $pop243, $pop59
+; CHECK-NEXT:    i8x16.extract_lane_u $push238=, $66, 7
+; CHECK-NEXT:    i32.const $push430=, 1
+; CHECK-NEXT:    i32.and $push239=, $pop238, $pop430
+; CHECK-NEXT:    i32.const $push54=, 23
+; CHECK-NEXT:    i32.shl $push240=, $pop239, $pop54
+; CHECK-NEXT:    i8x16.extract_lane_u $push234=, $66, 6
+; CHECK-NEXT:    i32.const $push429=, 1
+; CHECK-NEXT:    i32.and $push235=, $pop234, $pop429
+; CHECK-NEXT:    i32.const $push49=, 22
+; CHECK-NEXT:    i32.shl $push236=, $pop235, $pop49
+; CHECK-NEXT:    i8x16.extract_lane_u $push230=, $66, 5
+; CHECK-NEXT:    i32.const $push428=, 1
+; CHECK-NEXT:    i32.and $push231=, $pop230, $pop428
+; CHECK-NEXT:    i32.const $push44=, 21
+; CHECK-NEXT:    i32.shl $push232=, $pop231, $pop44
+; CHECK-NEXT:    i8x16.extract_lane_u $push226=, $66, 4
+; CHECK-NEXT:    i32.const $push427=, 1
+; CHECK-NEXT:    i32.and $push227=, $pop226, $pop427
+; CHECK-NEXT:    i32.const $push39=, 20
+; CHECK-NEXT:    i32.shl $push228=, $pop227, $pop39
+; CHECK-NEXT:    i8x16.extract_lane_u $push222=, $66, 3
+; CHECK-NEXT:    i32.const $push426=, 1
+; CHECK-NEXT:    i32.and $push223=, $pop222, $pop426
+; CHECK-NEXT:    i32.const $push34=, 19
+; CHECK-NEXT:    i32.shl $push224=, $pop223, $pop34
+; CHECK-NEXT:    i8x16.extract_lane_u $push218=, $66, 2
+; CHECK-NEXT:    i32.const $push425=, 1
+; CHECK-NEXT:    i32.and $push219=, $pop218, $pop425
+; CHECK-NEXT:    i32.const $push29=, 18
+; CHECK-NEXT:    i32.shl $push220=, $pop219, $pop29
+; CHECK-NEXT:    i8x16.extract_lane_u $push214=, $66, 1
+; CHECK-NEXT:    i32.const $push424=, 1
+; CHECK-NEXT:    i32.and $push215=, $pop214, $pop424
+; CHECK-NEXT:    i32.const $push24=, 17
+; CHECK-NEXT:    i32.shl $push216=, $pop215, $pop24
+; CHECK-NEXT:    i8x16.extract_lane_u $push211=, $66, 0
+; CHECK-NEXT:    i32.const $push423=, 1
+; CHECK-NEXT:    i32.and $push212=, $pop211, $pop423
+; CHECK-NEXT:    i32.const $push20=, 16
+; CHECK-NEXT:    i32.shl $push213=, $pop212, $pop20
+; CHECK-NEXT:    i32.or $push217=, $pop216, $pop213
+; CHECK-NEXT:    i32.or $push221=, $pop220, $pop217
+; CHECK-NEXT:    i32.or $push225=, $pop224, $pop221
+; CHECK-NEXT:    i32.or $push229=, $pop228, $pop225
+; CHECK-NEXT:    i32.or $push233=, $pop232, $pop229
+; CHECK-NEXT:    i32.or $push237=, $pop236, $pop233
+; CHECK-NEXT:    i32.or $push241=, $pop240, $pop237
+; CHECK-NEXT:    i32.or $push245=, $pop244, $pop241
+; CHECK-NEXT:    i32.or $push249=, $pop248, $pop245
+; CHECK-NEXT:    i32.or $push253=, $pop252, $pop249
+; CHECK-NEXT:    i32.or $push257=, $pop256, $pop253
+; CHECK-NEXT:    i32.or $push261=, $pop260, $pop257
+; CHECK-NEXT:    i32.or $push265=, $pop264, $pop261
+; CHECK-NEXT:    i32.or $push269=, $pop268, $pop265
+; CHECK-NEXT:    i32.or $push272=, $pop271, $pop269
+; CHECK-NEXT:    i32.or $push352=, $pop351, $pop272
+; CHECK-NEXT:    i64.extend_i32_u $push353=, $pop352
+; CHECK-NEXT:    i8x16.splat $push96=, $32
+; CHECK-NEXT:    i8x16.replace_lane $push97=, $pop96, 1, $33
+; CHECK-NEXT:    i8x16.replace_lane $push98=, $pop97, 2, $34
+; CHECK-NEXT:    i8x16.replace_lane $push99=, $pop98, 3, $35
+; CHECK-NEXT:    i8x16.replace_lane $push100=, $pop99, 4, $36
+; CHECK-NEXT:    i8x16.replace_lane $push101=, $pop100, 5, $37
+; CHECK-NEXT:    i8x16.replace_lane $push102=, $pop101, 6, $38
+; CHECK-NEXT:    i8x16.replace_lane $push103=, $pop102, 7, $39
+; CHECK-NEXT:    i8x16.replace_lane $push104=, $pop103, 8, $40
+; CHECK-NEXT:    i8x16.replace_lane $push105=, $pop104, 9, $41
+; CHECK-NEXT:    i8x16.replace_lane $push106=, $pop105, 10, $42
+; CHECK-NEXT:    i8x16.replace_lane $push107=, $pop106, 11, $43
+; CHECK-NEXT:    i8x16.replace_lane $push108=, $pop107, 12, $44
+; CHECK-NEXT:    i8x16.replace_lane $push109=, $pop108, 13, $45
+; CHECK-NEXT:    i8x16.replace_lane $push110=, $pop109, 14, $46
+; CHECK-NEXT:    i8x16.replace_lane $push111=, $pop110, 15, $47
+; CHECK-NEXT:    v128.and $push112=, $pop111, $64
+; CHECK-NEXT:    i8x16.eq $push422=, $pop112, $65
+; CHECK-NEXT:    local.tee $push421=, $66=, $pop422
+; CHECK-NEXT:    i8x16.extract_lane_u $push113=, $pop421, 0
+; CHECK-NEXT:    i32.const $push420=, 1
+; CHECK-NEXT:    i32.and $push114=, $pop113, $pop420
+; CHECK-NEXT:    i8x16.extract_lane_u $push115=, $66, 1
+; CHECK-NEXT:    i32.const $push419=, 1
+; CHECK-NEXT:    i32.and $push116=, $pop115, $pop419
+; CHECK-NEXT:    i32.const $push418=, 1
+; CHECK-NEXT:    i32.shl $push117=, $pop116, $pop418
+; CHECK-NEXT:    i32.or $push118=, $pop114, $pop117
+; CHECK-NEXT:    i8x16.extract_lane_u $push119=, $66, 2
+; CHECK-NEXT:    i32.const $push417=, 1
+; CHECK-NEXT:    i32.and $push120=, $pop119, $pop417
+; CHECK-NEXT:    i32.const $push416=, 2
+; CHECK-NEXT:    i32.shl $push122=, $pop120, $pop416
+; CHECK-NEXT:    i32.or $push123=, $pop118, $pop122
+; CHECK-NEXT:    i8x16.extract_lane_u $push124=, $66, 3
+; CHECK-NEXT:    i32.const $push415=, 1
+; CHECK-NEXT:    i32.and $push125=, $pop124, $pop415
+; CHECK-NEXT:    i32.const $push414=, 3
+; CHECK-NEXT:    i32.shl $push127=, $pop125, $pop414
+; CHECK-NEXT:    i32.or $push128=, $pop123, $pop127
+; CHECK-NEXT:    i8x16.extract_lane_u $push129=, $66, 4
+; CHECK-NEXT:    i32.const $push413=, 1
+; CHECK-NEXT:    i32.and $push130=, $pop129, $pop413
+; CHECK-NEXT:    i32.const $push412=, 4
+; CHECK-NEXT:    i32.shl $push132=, $pop130, $pop412
+; CHECK-NEXT:    i32.or $push133=, $pop128, $pop132
+; CHECK-NEXT:    i8x16.extract_lane_u $push134=, $66, 5
+; CHECK-NEXT:    i32.const $push411=, 1
+; CHECK-NEXT:    i32.and $push135=, $pop134, $pop411
+; CHECK-NEXT:    i32.const $push410=, 5
+; CHECK-NEXT:    i32.shl $push137=, $pop135, $pop410
+; CHECK-NEXT:    i32.or $push138=, $pop133, $pop137
+; CHECK-NEXT:    i8x16.extract_lane_u $push139=, $66, 6
+; CHECK-NEXT:    i32.const $push409=, 1
+; CHECK-NEXT:    i32.and $push140=, $pop139, $pop409
+; CHECK-NEXT:    i32.const $push408=, 6
+; CHECK-NEXT:    i32.shl $push142=, $pop140, $pop408
+; CHECK-NEXT:    i32.or $push143=, $pop138, $pop142
+; CHECK-NEXT:    i8x16.extract_lane_u $push144=, $66, 7
+; CHECK-NEXT:    i32.const $push407=, 1
+; CHECK-NEXT:    i32.and $push145=, $pop144, $pop407
+; CHECK-NEXT:    i32.const $push406=, 7
+; CHECK-NEXT:    i32.shl $push147=, $pop145, $pop406
+; CHECK-NEXT:    i32.or $push148=, $pop143, $pop147
+; CHECK-NEXT:    i8x16.extract_lane_u $push149=, $66, 8
+; CHECK-NEXT:    i32.const $push405=, 1
+; CHECK-NEXT:    i32.and $push150=, $pop149, $pop405
+; CHECK-NEXT:    i32.const $push404=, 8
+; CHECK-NEXT:    i32.shl $push152=, $pop150, $pop404
+; CHECK-NEXT:    i32.or $push153=, $pop148, $pop152
+; CHECK-NEXT:    i8x16.extract_lane_u $push154=, $66, 9
+; CHECK-NEXT:    i32.const $push403=, 1
+; CHECK-NEXT:    i32.and $push155=, $pop154, $pop403
+; CHECK-NEXT:    i32.const $push402=, 9
+; CHECK-NEXT:    i32.shl $push157=, $pop155, $pop402
+; CHECK-NEXT:    i32.or $push158=, $pop153, $pop157
+; CHECK-NEXT:    i8x16.extract_lane_u $push159=, $66, 10
+; CHECK-NEXT:    i32.const $push401=, 1
+; CHECK-NEXT:    i32.and $push160=, $pop159, $pop401
+; CHECK-NEXT:    i32.const $push400=, 10
+; CHECK-NEXT:    i32.shl $push162=, $pop160, $pop400
+; CHECK-NEXT:    i32.or $push163=, $pop158, $pop162
+; CHECK-NEXT:    i8x16.extract_lane_u $push164=, $66, 11
+; CHECK-NEXT:    i32.const $push399=, 1
+; CHECK-NEXT:    i32.and $push165=, $pop164, $pop399
+; CHECK-NEXT:    i32.const $push398=, 11
+; CHECK-NEXT:    i32.shl $push167=, $pop165, $pop398
+; CHECK-NEXT:    i32.or $push168=, $pop163, $pop167
+; CHECK-NEXT:    i8x16.extract_lane_u $push169=, $66, 12
+; CHECK-NEXT:    i32.const $push397=, 1
+; CHECK-NEXT:    i32.and $push170=, $pop169, $pop397
+; CHECK-NEXT:    i32.const $push396=, 12
+; CHECK-NEXT:    i32.shl $push172=, $pop170, $pop396
+; CHECK-NEXT:    i32.or $push173=, $pop168, $pop172
+; CHECK-NEXT:    i8x16.extract_lane_u $push174=, $66, 13
+; CHECK-NEXT:    i32.const $push395=, 1
+; CHECK-NEXT:    i32.and $push175=, $pop174, $pop395
+; CHECK-NEXT:    i32.const $push394=, 13
+; CHECK-NEXT:    i32.shl $push177=, $pop175, $pop394
+; CHECK-NEXT:    i32.or $push178=, $pop173, $pop177
+; CHECK-NEXT:    i8x16.extract_lane_u $push179=, $66, 14
+; CHECK-NEXT:    i32.const $push393=, 1
+; CHECK-NEXT:    i32.and $push180=, $pop179, $pop393
+; CHECK-NEXT:    i32.const $push392=, 14
+; CHECK-NEXT:    i32.shl $push182=, $pop180, $pop392
+; CHECK-NEXT:    i32.or $push183=, $pop178, $pop182
+; CHECK-NEXT:    i8x16.extract_lane_u $push184=, $66, 15
+; CHECK-NEXT:    i32.const $push391=, 15
+; CHECK-NEXT:    i32.shl $push186=, $pop184, $pop391
+; CHECK-NEXT:    i32.or $push187=, $pop183, $pop186
+; CHECK-NEXT:    i32.const $push390=, 65535
+; CHECK-NEXT:    i32.and $push189=, $pop187, $pop390
+; CHECK-NEXT:    i8x16.splat $push0=, $48
+; CHECK-NEXT:    i8x16.replace_lane $push1=, $pop0, 1, $49
+; CHECK-NEXT:    i8x16.replace_lane $push2=, $pop1, 2, $50
+; CHECK-NEXT:    i8x16.replace_lane $push3=, $pop2, 3, $51
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop3, 4, $52
+; CHECK-NEXT:    i8x16.replace_lane $push5=, $pop4, 5, $53
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop5, 6, $54
+; CHECK-NEXT:    i8x16.replace_lane $push7=, $pop6, 7, $55
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop7, 8, $56
+; CHECK-NEXT:    i8x16.replace_lane $push9=, $pop8, 9, $57
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop9, 10, $58
+; CHECK-NEXT:    i8x16.replace_lane $push11=, $pop10, 11, $59
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop11, 12, $60
+; CHECK-NEXT:    i8x16.replace_lane $push13=, $pop12, 13, $61
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop13, 14, $62
+; CHECK-NEXT:    i8x16.replace_lane $push15=, $pop14, 15, $63
+; CHECK-NEXT:    v128.and $push16=, $pop15, $64
+; CHECK-NEXT:    i8x16.eq $push389=, $pop16, $65
+; CHECK-NEXT:    local.tee $push388=, $66=, $pop389
+; CHECK-NEXT:    i8x16.extract_lane_u $push92=, $pop388, 15
+; CHECK-NEXT:    i32.const $push387=, 31
+; CHECK-NEXT:    i32.shl $push94=, $pop92, $pop387
+; CHECK-NEXT:    i8x16.extract_lane_u $push87=, $66, 14
+; CHECK-NEXT:    i32.const $push386=, 1
+; CHECK-NEXT:    i32.and $push88=, $pop87, $pop386
+; CHECK-NEXT:    i32.const $push385=, 30
+; CHECK-NEXT:    i32.shl $push90=, $pop88, $pop385
+; CHECK-NEXT:    i8x16.extract_lane_u $push82=, $66, 13
+; CHECK-NEXT:    i32.const $push384=, 1
+; CHECK-NEXT:    i32.and $push83=, $pop82, $pop384
+; CHECK-NEXT:    i32.const $push383=, 29
+; CHECK-NEXT:    i32.shl $push85=, $pop83, $pop383
+; CHECK-NEXT:    i8x16.extract_lane_u $push77=, $66, 12
+; CHECK-NEXT:    i32.const $push382=, 1
+; CHECK-NEXT:    i32.and $push78=, $pop77, $pop382
+; CHECK-NEXT:    i32.const $push381=, 28
+; CHECK-NEXT:    i32.shl $push80=, $pop78, $pop381
+; CHECK-NEXT:    i8x16.extract_lane_u $push72=, $66, 11
+; CHECK-NEXT:    i32.const $push380=, 1
+; CHECK-NEXT:    i32.and $push73=, $pop72, $pop380
+; CHECK-NEXT:    i32.const $push379=, 27
+; CHECK-NEXT:    i32.shl $push75=, $pop73, $pop379
+; CHECK-NEXT:    i8x16.extract_lane_u $push67=, $66, 10
+; CHECK-NEXT:    i32.const $push378=, 1
+; CHECK-NEXT:    i32.and $push68=, $pop67, $pop378
+; CHECK-NEXT:    i32.const $push377=, 26
+; CHECK-NEXT:    i32.shl $push70=, $pop68, $pop377
+; CHECK-NEXT:    i8x16.extract_lane_u $push62=, $66, 9
+; CHECK-NEXT:    i32.const $push376=, 1
+; CHECK-NEXT:    i32.and $push63=, $pop62, $pop376
+; CHECK-NEXT:    i32.const $push375=, 25
+; CHECK-NEXT:    i32.shl $push65=, $pop63, $pop375
+; CHECK-NEXT:    i8x16.extract_lane_u $push57=, $66, 8
+; CHECK-NEXT:    i32.const $push374=, 1
+; CHECK-NEXT:    i32.and $push58=, $pop57, $pop374
+; CHECK-NEXT:    i32.const $push373=, 24
+; CHECK-NEXT:    i32.shl $push60=, $pop58, $pop373
+; CHECK-NEXT:    i8x16.extract_lane_u $push52=, $66, 7
+; CHECK-NEXT:    i32.const $push372=, 1
+; CHECK-NEXT:    i32.and $push53=, $pop52, $pop372
+; CHECK-NEXT:    i32.const $push371=, 23
+; CHECK-NEXT:    i32.shl $push55=, $pop53, $pop371
+; CHECK-NEXT:    i8x16.extract_lane_u $push47=, $66, 6
+; CHECK-NEXT:    i32.const $push370=, 1
+; CHECK-NEXT:    i32.and $push48=, $pop47, $pop370
+; CHECK-NEXT:    i32.const $push369=, 22
+; CHECK-NEXT:    i32.shl $push50=, $pop48, $pop369
+; CHECK-NEXT:    i8x16.extract_lane_u $push42=, $66, 5
+; CHECK-NEXT:    i32.const $push368=, 1
+; CHECK-NEXT:    i32.and $push43=, $pop42, $pop368
+; CHECK-NEXT:    i32.const $push367=, 21
+; CHECK-NEXT:    i32.shl $push45=, $pop43, $pop367
+; CHECK-NEXT:    i8x16.extract_lane_u $push37=, $66, 4
+; CHECK-NEXT:    i32.const $push366=, 1
+; CHECK-NEXT:    i32.and $push38=, $pop37, $pop366
+; CHECK-NEXT:    i32.const $push365=, 20
+; CHECK-NEXT:    i32.shl $push40=, $pop38, $pop365
+; CHECK-NEXT:    i8x16.extract_lane_u $push32=, $66, 3
+; CHECK-NEXT:    i32.const $push364=, 1
+; CHECK-NEXT:    i32.and $push33=, $pop32, $pop364
+; CHECK-NEXT:    i32.const $push363=, 19
+; CHECK-NEXT:    i32.shl $push35=, $pop33, $pop363
+; CHECK-NEXT:    i8x16.extract_lane_u $push27=, $66, 2
+; CHECK-NEXT:    i32.const $push362=, 1
+; CHECK-NEXT:    i32.and $push28=, $pop27, $pop362
+; CHECK-NEXT:    i32.const $push361=, 18
+; CHECK-NEXT:    i32.shl $push30=, $pop28, $pop361
+; CHECK-NEXT:    i8x16.extract_lane_u $push22=, $66, 1
+; CHECK-NEXT:    i32.const $push360=, 1
+; CHECK-NEXT:    i32.and $push23=, $pop22, $pop360
+; CHECK-NEXT:    i32.const $push359=, 17
+; CHECK-NEXT:    i32.shl $push25=, $pop23, $pop359
+; CHECK-NEXT:    i8x16.extract_lane_u $push17=, $66, 0
+; CHECK-NEXT:    i32.const $push358=, 1
+; CHECK-NEXT:    i32.and $push19=, $pop17, $pop358
+; CHECK-NEXT:    i32.const $push357=, 16
+; CHECK-NEXT:    i32.shl $push21=, $pop19, $pop357
+; CHECK-NEXT:    i32.or $push26=, $pop25, $pop21
+; CHECK-NEXT:    i32.or $push31=, $pop30, $pop26
+; CHECK-NEXT:    i32.or $push36=, $pop35, $pop31
+; CHECK-NEXT:    i32.or $push41=, $pop40, $pop36
+; CHECK-NEXT:    i32.or $push46=, $pop45, $pop41
+; CHECK-NEXT:    i32.or $push51=, $pop50, $pop46
+; CHECK-NEXT:    i32.or $push56=, $pop55, $pop51
+; CHECK-NEXT:    i32.or $push61=, $pop60, $pop56
+; CHECK-NEXT:    i32.or $push66=, $pop65, $pop61
+; CHECK-NEXT:    i32.or $push71=, $pop70, $pop66
+; CHECK-NEXT:    i32.or $push76=, $pop75, $pop71
+; CHECK-NEXT:    i32.or $push81=, $pop80, $pop76
+; CHECK-NEXT:    i32.or $push86=, $pop85, $pop81
+; CHECK-NEXT:    i32.or $push91=, $pop90, $pop86
+; CHECK-NEXT:    i32.or $push95=, $pop94, $pop91
+; CHECK-NEXT:    i32.or $push190=, $pop189, $pop95
+; CHECK-NEXT:    i64.extend_i32_u $push191=, $pop190
+; CHECK-NEXT:    i64.const $push192=, 32
+; CHECK-NEXT:    i64.shl $push193=, $pop191, $pop192
+; CHECK-NEXT:    i64.or $push354=, $pop353, $pop193
+; CHECK-NEXT:    return $pop354
+    %z = icmp eq <64 x i4> %x, splat (i4 64)
+    %res = bitcast <64 x i1> %z to i64
+    ret i64 %res
+}

>From 55f092a83776a34e8cf8b8e50c445ec2c38408f9 Mon Sep 17 00:00:00 2001
From: badumbatish <jjasmine at igalia.com>
Date: Mon, 23 Jun 2025 16:27:17 -0700
Subject: [PATCH 2/2] [WebAssembly] [Backend] Optimize illegal bitmask

---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  64 +-
 .../WebAssembly/simd-illegal-bitmask.ll       | 626 ++----------------
 2 files changed, 94 insertions(+), 596 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 3cd923c0ba058..51f9cbc03af2d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -18,12 +18,14 @@
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyTargetMachine.h"
 #include "WebAssemblyUtilities.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -3214,20 +3216,26 @@ static SDValue performTruncateCombine(SDNode *N,
 
 static SDValue performBitcastCombine(SDNode *N,
                                      TargetLowering::DAGCombinerInfo &DCI) {
+  using namespace llvm::SDPatternMatch;
   auto &DAG = DCI.DAG;
   SDLoc DL(N);
   SDValue Src = N->getOperand(0);
   EVT VT = N->getValueType(0);
   EVT SrcVT = Src.getValueType();
 
-  // bitcast <N x i1> to iN
+  bool Vectorizable = DCI.isBeforeLegalize() && VT.isScalarInteger() &&
+                      SrcVT.isFixedLengthVector() &&
+                      SrcVT.getScalarType() == MVT::i1;
+
+  if (!Vectorizable)
+    return SDValue();
+
+  unsigned NumElts = SrcVT.getVectorNumElements();
+  EVT Width = MVT::getIntegerVT(128 / NumElts);
+
+  // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
   //   ==> bitmask
-  if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
-      SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1) {
-    unsigned NumElts = SrcVT.getVectorNumElements();
-    if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
-      return SDValue();
-    EVT Width = MVT::getIntegerVT(128 / NumElts);
+  if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
     return DAG.getZExtOrTrunc(
         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
                     {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
@@ -3236,6 +3244,48 @@ static SDValue performBitcastCombine(SDNode *N,
         DL, VT);
   }
 
+  // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
+  if (NumElts == 32 || NumElts == 64) {
+    // Strategy: We will setcc them seperately in v16i8 -> v16i1
+    // Bitcast them to i16, extend them to either i32 or i64.
+    // Add them together, shifting left 1 by 1.
+    SDValue Concat, SetCCVector;
+    ISD::CondCode SetCond;
+
+    if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat),
+                                         m_VectorVT(m_Value(SetCCVector)),
+                                         m_CondCode(SetCond)))))
+      return SDValue();
+    if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
+      return SDValue();
+    // CHECK IF VECTOR is a constant, i.e all values are the same
+    if (!ISD::isBuildVectorOfConstantSDNodes(SetCCVector.getNode()))
+      return SDValue();
+
+    SDValue SplitSetCCVec =
+        DAG.getSplat(MVT::v16i8, DL, SetCCVector->ops().front());
+
+    SmallVector<SDValue> VectorsToShuffle;
+    for (SDValue V : Concat->ops())
+      VectorsToShuffle.push_back(DAG.getBitcast(
+          MVT::i16, DAG.getSetCC(DL, MVT::v16i1, V, SplitSetCCVec, SetCond)));
+
+    MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
+    SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
+
+    for (SDValue V : VectorsToShuffle) {
+      ReturningInteger = DAG.getNode(
+          ISD::SHL, DL, ReturnType,
+          {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
+
+      SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
+      ReturningInteger =
+          DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
+    }
+
+    return ReturningInteger;
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
index 1715dc03c0917..58152afbfcb5a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-illegal-bitmask.ll
@@ -22,203 +22,18 @@ define i32 @optimize_illegal_bitcast_v32i8(<32 x i8> %x) {
 ; CHECK-LABEL: optimize_illegal_bitcast_v32i8:
 ; CHECK:         .functype optimize_illegal_bitcast_v32i8 (v128, v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    global.get $push157=, __stack_pointer
-; CHECK-NEXT:    i64.const $push158=, 16
-; CHECK-NEXT:    i64.sub $drop=, $pop157, $pop158
-; CHECK-NEXT:    v128.const $push194=, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
-; CHECK-NEXT:    local.tee $push193=, $2=, $pop194
-; CHECK-NEXT:    i8x16.eq $push192=, $0, $pop193
-; CHECK-NEXT:    local.tee $push191=, $0=, $pop192
-; CHECK-NEXT:    i8x16.extract_lane_u $push79=, $pop191, 0
-; CHECK-NEXT:    i32.const $push1=, 1
-; CHECK-NEXT:    i32.and $push80=, $pop79, $pop1
-; CHECK-NEXT:    i8x16.extract_lane_u $push81=, $0, 1
-; CHECK-NEXT:    i32.const $push190=, 1
-; CHECK-NEXT:    i32.and $push82=, $pop81, $pop190
-; CHECK-NEXT:    i32.const $push189=, 1
-; CHECK-NEXT:    i32.shl $push83=, $pop82, $pop189
-; CHECK-NEXT:    i32.or $push84=, $pop80, $pop83
-; CHECK-NEXT:    i8x16.extract_lane_u $push85=, $0, 2
-; CHECK-NEXT:    i32.const $push188=, 1
-; CHECK-NEXT:    i32.and $push86=, $pop85, $pop188
-; CHECK-NEXT:    i32.const $push87=, 2
-; CHECK-NEXT:    i32.shl $push88=, $pop86, $pop87
-; CHECK-NEXT:    i32.or $push89=, $pop84, $pop88
-; CHECK-NEXT:    i8x16.extract_lane_u $push90=, $0, 3
-; CHECK-NEXT:    i32.const $push187=, 1
-; CHECK-NEXT:    i32.and $push91=, $pop90, $pop187
-; CHECK-NEXT:    i32.const $push92=, 3
-; CHECK-NEXT:    i32.shl $push93=, $pop91, $pop92
-; CHECK-NEXT:    i32.or $push94=, $pop89, $pop93
-; CHECK-NEXT:    i8x16.extract_lane_u $push95=, $0, 4
-; CHECK-NEXT:    i32.const $push186=, 1
-; CHECK-NEXT:    i32.and $push96=, $pop95, $pop186
-; CHECK-NEXT:    i32.const $push97=, 4
-; CHECK-NEXT:    i32.shl $push98=, $pop96, $pop97
-; CHECK-NEXT:    i32.or $push99=, $pop94, $pop98
-; CHECK-NEXT:    i8x16.extract_lane_u $push100=, $0, 5
-; CHECK-NEXT:    i32.const $push185=, 1
-; CHECK-NEXT:    i32.and $push101=, $pop100, $pop185
-; CHECK-NEXT:    i32.const $push102=, 5
-; CHECK-NEXT:    i32.shl $push103=, $pop101, $pop102
-; CHECK-NEXT:    i32.or $push104=, $pop99, $pop103
-; CHECK-NEXT:    i8x16.extract_lane_u $push105=, $0, 6
-; CHECK-NEXT:    i32.const $push184=, 1
-; CHECK-NEXT:    i32.and $push106=, $pop105, $pop184
-; CHECK-NEXT:    i32.const $push107=, 6
-; CHECK-NEXT:    i32.shl $push108=, $pop106, $pop107
-; CHECK-NEXT:    i32.or $push109=, $pop104, $pop108
-; CHECK-NEXT:    i8x16.extract_lane_u $push110=, $0, 7
-; CHECK-NEXT:    i32.const $push183=, 1
-; CHECK-NEXT:    i32.and $push111=, $pop110, $pop183
-; CHECK-NEXT:    i32.const $push112=, 7
-; CHECK-NEXT:    i32.shl $push113=, $pop111, $pop112
-; CHECK-NEXT:    i32.or $push114=, $pop109, $pop113
-; CHECK-NEXT:    i8x16.extract_lane_u $push115=, $0, 8
-; CHECK-NEXT:    i32.const $push182=, 1
-; CHECK-NEXT:    i32.and $push116=, $pop115, $pop182
-; CHECK-NEXT:    i32.const $push117=, 8
-; CHECK-NEXT:    i32.shl $push118=, $pop116, $pop117
-; CHECK-NEXT:    i32.or $push119=, $pop114, $pop118
-; CHECK-NEXT:    i8x16.extract_lane_u $push120=, $0, 9
-; CHECK-NEXT:    i32.const $push181=, 1
-; CHECK-NEXT:    i32.and $push121=, $pop120, $pop181
-; CHECK-NEXT:    i32.const $push122=, 9
-; CHECK-NEXT:    i32.shl $push123=, $pop121, $pop122
-; CHECK-NEXT:    i32.or $push124=, $pop119, $pop123
-; CHECK-NEXT:    i8x16.extract_lane_u $push125=, $0, 10
-; CHECK-NEXT:    i32.const $push180=, 1
-; CHECK-NEXT:    i32.and $push126=, $pop125, $pop180
-; CHECK-NEXT:    i32.const $push127=, 10
-; CHECK-NEXT:    i32.shl $push128=, $pop126, $pop127
-; CHECK-NEXT:    i32.or $push129=, $pop124, $pop128
-; CHECK-NEXT:    i8x16.extract_lane_u $push130=, $0, 11
-; CHECK-NEXT:    i32.const $push179=, 1
-; CHECK-NEXT:    i32.and $push131=, $pop130, $pop179
-; CHECK-NEXT:    i32.const $push132=, 11
-; CHECK-NEXT:    i32.shl $push133=, $pop131, $pop132
-; CHECK-NEXT:    i32.or $push134=, $pop129, $pop133
-; CHECK-NEXT:    i8x16.extract_lane_u $push135=, $0, 12
-; CHECK-NEXT:    i32.const $push178=, 1
-; CHECK-NEXT:    i32.and $push136=, $pop135, $pop178
-; CHECK-NEXT:    i32.const $push137=, 12
-; CHECK-NEXT:    i32.shl $push138=, $pop136, $pop137
-; CHECK-NEXT:    i32.or $push139=, $pop134, $pop138
-; CHECK-NEXT:    i8x16.extract_lane_u $push140=, $0, 13
-; CHECK-NEXT:    i32.const $push177=, 1
-; CHECK-NEXT:    i32.and $push141=, $pop140, $pop177
-; CHECK-NEXT:    i32.const $push142=, 13
-; CHECK-NEXT:    i32.shl $push143=, $pop141, $pop142
-; CHECK-NEXT:    i32.or $push144=, $pop139, $pop143
-; CHECK-NEXT:    i8x16.extract_lane_u $push145=, $0, 14
-; CHECK-NEXT:    i32.const $push176=, 1
-; CHECK-NEXT:    i32.and $push146=, $pop145, $pop176
-; CHECK-NEXT:    i32.const $push147=, 14
-; CHECK-NEXT:    i32.shl $push148=, $pop146, $pop147
-; CHECK-NEXT:    i32.or $push149=, $pop144, $pop148
-; CHECK-NEXT:    i8x16.extract_lane_u $push150=, $0, 15
-; CHECK-NEXT:    i32.const $push151=, 15
-; CHECK-NEXT:    i32.shl $push152=, $pop150, $pop151
-; CHECK-NEXT:    i32.or $push153=, $pop149, $pop152
-; CHECK-NEXT:    i32.const $push154=, 65535
-; CHECK-NEXT:    i32.and $push155=, $pop153, $pop154
-; CHECK-NEXT:    i8x16.eq $push175=, $1, $2
-; CHECK-NEXT:    local.tee $push174=, $0=, $pop175
-; CHECK-NEXT:    i8x16.extract_lane_u $push75=, $pop174, 15
-; CHECK-NEXT:    i32.const $push76=, 31
-; CHECK-NEXT:    i32.shl $push77=, $pop75, $pop76
-; CHECK-NEXT:    i8x16.extract_lane_u $push70=, $0, 14
-; CHECK-NEXT:    i32.const $push173=, 1
-; CHECK-NEXT:    i32.and $push71=, $pop70, $pop173
-; CHECK-NEXT:    i32.const $push72=, 30
-; CHECK-NEXT:    i32.shl $push73=, $pop71, $pop72
-; CHECK-NEXT:    i8x16.extract_lane_u $push65=, $0, 13
-; CHECK-NEXT:    i32.const $push172=, 1
-; CHECK-NEXT:    i32.and $push66=, $pop65, $pop172
-; CHECK-NEXT:    i32.const $push67=, 29
-; CHECK-NEXT:    i32.shl $push68=, $pop66, $pop67
-; CHECK-NEXT:    i8x16.extract_lane_u $push60=, $0, 12
-; CHECK-NEXT:    i32.const $push171=, 1
-; CHECK-NEXT:    i32.and $push61=, $pop60, $pop171
-; CHECK-NEXT:    i32.const $push62=, 28
-; CHECK-NEXT:    i32.shl $push63=, $pop61, $pop62
-; CHECK-NEXT:    i8x16.extract_lane_u $push55=, $0, 11
-; CHECK-NEXT:    i32.const $push170=, 1
-; CHECK-NEXT:    i32.and $push56=, $pop55, $pop170
-; CHECK-NEXT:    i32.const $push57=, 27
-; CHECK-NEXT:    i32.shl $push58=, $pop56, $pop57
-; CHECK-NEXT:    i8x16.extract_lane_u $push50=, $0, 10
-; CHECK-NEXT:    i32.const $push169=, 1
-; CHECK-NEXT:    i32.and $push51=, $pop50, $pop169
-; CHECK-NEXT:    i32.const $push52=, 26
-; CHECK-NEXT:    i32.shl $push53=, $pop51, $pop52
-; CHECK-NEXT:    i8x16.extract_lane_u $push45=, $0, 9
-; CHECK-NEXT:    i32.const $push168=, 1
-; CHECK-NEXT:    i32.and $push46=, $pop45, $pop168
-; CHECK-NEXT:    i32.const $push47=, 25
-; CHECK-NEXT:    i32.shl $push48=, $pop46, $pop47
-; CHECK-NEXT:    i8x16.extract_lane_u $push40=, $0, 8
-; CHECK-NEXT:    i32.const $push167=, 1
-; CHECK-NEXT:    i32.and $push41=, $pop40, $pop167
-; CHECK-NEXT:    i32.const $push42=, 24
-; CHECK-NEXT:    i32.shl $push43=, $pop41, $pop42
-; CHECK-NEXT:    i8x16.extract_lane_u $push35=, $0, 7
-; CHECK-NEXT:    i32.const $push166=, 1
-; CHECK-NEXT:    i32.and $push36=, $pop35, $pop166
-; CHECK-NEXT:    i32.const $push37=, 23
-; CHECK-NEXT:    i32.shl $push38=, $pop36, $pop37
-; CHECK-NEXT:    i8x16.extract_lane_u $push30=, $0, 6
-; CHECK-NEXT:    i32.const $push165=, 1
-; CHECK-NEXT:    i32.and $push31=, $pop30, $pop165
-; CHECK-NEXT:    i32.const $push32=, 22
-; CHECK-NEXT:    i32.shl $push33=, $pop31, $pop32
-; CHECK-NEXT:    i8x16.extract_lane_u $push25=, $0, 5
-; CHECK-NEXT:    i32.const $push164=, 1
-; CHECK-NEXT:    i32.and $push26=, $pop25, $pop164
-; CHECK-NEXT:    i32.const $push27=, 21
-; CHECK-NEXT:    i32.shl $push28=, $pop26, $pop27
-; CHECK-NEXT:    i8x16.extract_lane_u $push20=, $0, 4
-; CHECK-NEXT:    i32.const $push163=, 1
-; CHECK-NEXT:    i32.and $push21=, $pop20, $pop163
-; CHECK-NEXT:    i32.const $push22=, 20
-; CHECK-NEXT:    i32.shl $push23=, $pop21, $pop22
-; CHECK-NEXT:    i8x16.extract_lane_u $push15=, $0, 3
-; CHECK-NEXT:    i32.const $push162=, 1
-; CHECK-NEXT:    i32.and $push16=, $pop15, $pop162
-; CHECK-NEXT:    i32.const $push17=, 19
-; CHECK-NEXT:    i32.shl $push18=, $pop16, $pop17
-; CHECK-NEXT:    i8x16.extract_lane_u $push10=, $0, 2
-; CHECK-NEXT:    i32.const $push161=, 1
-; CHECK-NEXT:    i32.and $push11=, $pop10, $pop161
-; CHECK-NEXT:    i32.const $push12=, 18
-; CHECK-NEXT:    i32.shl $push13=, $pop11, $pop12
-; CHECK-NEXT:    i8x16.extract_lane_u $push5=, $0, 1
-; CHECK-NEXT:    i32.const $push160=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop5, $pop160
-; CHECK-NEXT:    i32.const $push7=, 17
-; CHECK-NEXT:    i32.shl $push8=, $pop6, $pop7
-; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $0, 0
-; CHECK-NEXT:    i32.const $push159=, 1
-; CHECK-NEXT:    i32.and $push2=, $pop0, $pop159
-; CHECK-NEXT:    i32.const $push3=, 16
+; CHECK-NEXT:    i32.const $push2=, 16
+; CHECK-NEXT:    v128.const $push10=, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+; CHECK-NEXT:    local.tee $push9=, $2=, $pop10
+; CHECK-NEXT:    i8x16.eq $push0=, $0, $pop9
+; CHECK-NEXT:    i8x16.bitmask $push1=, $pop0
+; CHECK-NEXT:    i32.const $push8=, 16
+; CHECK-NEXT:    i32.add $push3=, $pop1, $pop8
 ; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop3
-; CHECK-NEXT:    i32.or $push9=, $pop8, $pop4
-; CHECK-NEXT:    i32.or $push14=, $pop13, $pop9
-; CHECK-NEXT:    i32.or $push19=, $pop18, $pop14
-; CHECK-NEXT:    i32.or $push24=, $pop23, $pop19
-; CHECK-NEXT:    i32.or $push29=, $pop28, $pop24
-; CHECK-NEXT:    i32.or $push34=, $pop33, $pop29
-; CHECK-NEXT:    i32.or $push39=, $pop38, $pop34
-; CHECK-NEXT:    i32.or $push44=, $pop43, $pop39
-; CHECK-NEXT:    i32.or $push49=, $pop48, $pop44
-; CHECK-NEXT:    i32.or $push54=, $pop53, $pop49
-; CHECK-NEXT:    i32.or $push59=, $pop58, $pop54
-; CHECK-NEXT:    i32.or $push64=, $pop63, $pop59
-; CHECK-NEXT:    i32.or $push69=, $pop68, $pop64
-; CHECK-NEXT:    i32.or $push74=, $pop73, $pop69
-; CHECK-NEXT:    i32.or $push78=, $pop77, $pop74
-; CHECK-NEXT:    i32.or $push156=, $pop155, $pop78
-; CHECK-NEXT:    return $pop156
+; CHECK-NEXT:    i8x16.eq $push5=, $1, $2
+; CHECK-NEXT:    i8x16.bitmask $push6=, $pop5
+; CHECK-NEXT:    i32.add $push7=, $pop4, $pop6
+; CHECK-NEXT:    return $pop7
     %z = icmp eq <32 x i8> %x, splat (i8 32)
     %res = bitcast <32 x i1> %z to i32
     ret i32 %res
@@ -229,399 +44,32 @@ define i64 @optimize_illegal_bitcast_v64i8(<64 x i8> %x) {
 ; CHECK-LABEL: optimize_illegal_bitcast_v64i8:
 ; CHECK:         .functype optimize_illegal_bitcast_v64i8 (v128, v128, v128, v128) -> (i64)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    global.get $push287=, __stack_pointer
-; CHECK-NEXT:    i64.const $push288=, 16
-; CHECK-NEXT:    i64.sub $drop=, $pop287, $pop288
-; CHECK-NEXT:    v128.const $push390=, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
-; CHECK-NEXT:    local.tee $push389=, $4=, $pop390
-; CHECK-NEXT:    i8x16.eq $push388=, $0, $pop389
-; CHECK-NEXT:    local.tee $push387=, $0=, $pop388
-; CHECK-NEXT:    i8x16.extract_lane_u $push222=, $pop387, 0
-; CHECK-NEXT:    i32.const $push1=, 1
-; CHECK-NEXT:    i32.and $push223=, $pop222, $pop1
-; CHECK-NEXT:    i8x16.extract_lane_u $push224=, $0, 1
-; CHECK-NEXT:    i32.const $push386=, 1
-; CHECK-NEXT:    i32.and $push225=, $pop224, $pop386
-; CHECK-NEXT:    i32.const $push385=, 1
-; CHECK-NEXT:    i32.shl $push226=, $pop225, $pop385
-; CHECK-NEXT:    i32.or $push227=, $pop223, $pop226
-; CHECK-NEXT:    i8x16.extract_lane_u $push228=, $0, 2
-; CHECK-NEXT:    i32.const $push384=, 1
-; CHECK-NEXT:    i32.and $push229=, $pop228, $pop384
-; CHECK-NEXT:    i32.const $push87=, 2
-; CHECK-NEXT:    i32.shl $push230=, $pop229, $pop87
-; CHECK-NEXT:    i32.or $push231=, $pop227, $pop230
-; CHECK-NEXT:    i8x16.extract_lane_u $push232=, $0, 3
-; CHECK-NEXT:    i32.const $push383=, 1
-; CHECK-NEXT:    i32.and $push233=, $pop232, $pop383
-; CHECK-NEXT:    i32.const $push92=, 3
-; CHECK-NEXT:    i32.shl $push234=, $pop233, $pop92
-; CHECK-NEXT:    i32.or $push235=, $pop231, $pop234
-; CHECK-NEXT:    i8x16.extract_lane_u $push236=, $0, 4
-; CHECK-NEXT:    i32.const $push382=, 1
-; CHECK-NEXT:    i32.and $push237=, $pop236, $pop382
-; CHECK-NEXT:    i32.const $push97=, 4
-; CHECK-NEXT:    i32.shl $push238=, $pop237, $pop97
-; CHECK-NEXT:    i32.or $push239=, $pop235, $pop238
-; CHECK-NEXT:    i8x16.extract_lane_u $push240=, $0, 5
-; CHECK-NEXT:    i32.const $push381=, 1
-; CHECK-NEXT:    i32.and $push241=, $pop240, $pop381
-; CHECK-NEXT:    i32.const $push102=, 5
-; CHECK-NEXT:    i32.shl $push242=, $pop241, $pop102
-; CHECK-NEXT:    i32.or $push243=, $pop239, $pop242
-; CHECK-NEXT:    i8x16.extract_lane_u $push244=, $0, 6
-; CHECK-NEXT:    i32.const $push380=, 1
-; CHECK-NEXT:    i32.and $push245=, $pop244, $pop380
-; CHECK-NEXT:    i32.const $push107=, 6
-; CHECK-NEXT:    i32.shl $push246=, $pop245, $pop107
-; CHECK-NEXT:    i32.or $push247=, $pop243, $pop246
-; CHECK-NEXT:    i8x16.extract_lane_u $push248=, $0, 7
-; CHECK-NEXT:    i32.const $push379=, 1
-; CHECK-NEXT:    i32.and $push249=, $pop248, $pop379
-; CHECK-NEXT:    i32.const $push112=, 7
-; CHECK-NEXT:    i32.shl $push250=, $pop249, $pop112
-; CHECK-NEXT:    i32.or $push251=, $pop247, $pop250
-; CHECK-NEXT:    i8x16.extract_lane_u $push252=, $0, 8
-; CHECK-NEXT:    i32.const $push378=, 1
-; CHECK-NEXT:    i32.and $push253=, $pop252, $pop378
-; CHECK-NEXT:    i32.const $push117=, 8
-; CHECK-NEXT:    i32.shl $push254=, $pop253, $pop117
-; CHECK-NEXT:    i32.or $push255=, $pop251, $pop254
-; CHECK-NEXT:    i8x16.extract_lane_u $push256=, $0, 9
-; CHECK-NEXT:    i32.const $push377=, 1
-; CHECK-NEXT:    i32.and $push257=, $pop256, $pop377
-; CHECK-NEXT:    i32.const $push122=, 9
-; CHECK-NEXT:    i32.shl $push258=, $pop257, $pop122
-; CHECK-NEXT:    i32.or $push259=, $pop255, $pop258
-; CHECK-NEXT:    i8x16.extract_lane_u $push260=, $0, 10
-; CHECK-NEXT:    i32.const $push376=, 1
-; CHECK-NEXT:    i32.and $push261=, $pop260, $pop376
-; CHECK-NEXT:    i32.const $push127=, 10
-; CHECK-NEXT:    i32.shl $push262=, $pop261, $pop127
-; CHECK-NEXT:    i32.or $push263=, $pop259, $pop262
-; CHECK-NEXT:    i8x16.extract_lane_u $push264=, $0, 11
-; CHECK-NEXT:    i32.const $push375=, 1
-; CHECK-NEXT:    i32.and $push265=, $pop264, $pop375
-; CHECK-NEXT:    i32.const $push132=, 11
-; CHECK-NEXT:    i32.shl $push266=, $pop265, $pop132
-; CHECK-NEXT:    i32.or $push267=, $pop263, $pop266
-; CHECK-NEXT:    i8x16.extract_lane_u $push268=, $0, 12
-; CHECK-NEXT:    i32.const $push374=, 1
-; CHECK-NEXT:    i32.and $push269=, $pop268, $pop374
-; CHECK-NEXT:    i32.const $push137=, 12
-; CHECK-NEXT:    i32.shl $push270=, $pop269, $pop137
-; CHECK-NEXT:    i32.or $push271=, $pop267, $pop270
-; CHECK-NEXT:    i8x16.extract_lane_u $push272=, $0, 13
-; CHECK-NEXT:    i32.const $push373=, 1
-; CHECK-NEXT:    i32.and $push273=, $pop272, $pop373
-; CHECK-NEXT:    i32.const $push142=, 13
-; CHECK-NEXT:    i32.shl $push274=, $pop273, $pop142
-; CHECK-NEXT:    i32.or $push275=, $pop271, $pop274
-; CHECK-NEXT:    i8x16.extract_lane_u $push276=, $0, 14
-; CHECK-NEXT:    i32.const $push372=, 1
-; CHECK-NEXT:    i32.and $push277=, $pop276, $pop372
-; CHECK-NEXT:    i32.const $push147=, 14
-; CHECK-NEXT:    i32.shl $push278=, $pop277, $pop147
-; CHECK-NEXT:    i32.or $push279=, $pop275, $pop278
-; CHECK-NEXT:    i8x16.extract_lane_u $push280=, $0, 15
-; CHECK-NEXT:    i32.const $push151=, 15
-; CHECK-NEXT:    i32.shl $push281=, $pop280, $pop151
-; CHECK-NEXT:    i32.or $push282=, $pop279, $pop281
-; CHECK-NEXT:    i32.const $push154=, 65535
-; CHECK-NEXT:    i32.and $push283=, $pop282, $pop154
-; CHECK-NEXT:    i8x16.eq $push371=, $1, $4
-; CHECK-NEXT:    local.tee $push370=, $0=, $pop371
-; CHECK-NEXT:    i8x16.extract_lane_u $push219=, $pop370, 15
-; CHECK-NEXT:    i32.const $push76=, 31
-; CHECK-NEXT:    i32.shl $push220=, $pop219, $pop76
-; CHECK-NEXT:    i8x16.extract_lane_u $push215=, $0, 14
-; CHECK-NEXT:    i32.const $push369=, 1
-; CHECK-NEXT:    i32.and $push216=, $pop215, $pop369
-; CHECK-NEXT:    i32.const $push72=, 30
-; CHECK-NEXT:    i32.shl $push217=, $pop216, $pop72
-; CHECK-NEXT:    i8x16.extract_lane_u $push211=, $0, 13
-; CHECK-NEXT:    i32.const $push368=, 1
-; CHECK-NEXT:    i32.and $push212=, $pop211, $pop368
-; CHECK-NEXT:    i32.const $push67=, 29
-; CHECK-NEXT:    i32.shl $push213=, $pop212, $pop67
-; CHECK-NEXT:    i8x16.extract_lane_u $push207=, $0, 12
-; CHECK-NEXT:    i32.const $push367=, 1
-; CHECK-NEXT:    i32.and $push208=, $pop207, $pop367
-; CHECK-NEXT:    i32.const $push62=, 28
-; CHECK-NEXT:    i32.shl $push209=, $pop208, $pop62
-; CHECK-NEXT:    i8x16.extract_lane_u $push203=, $0, 11
-; CHECK-NEXT:    i32.const $push366=, 1
-; CHECK-NEXT:    i32.and $push204=, $pop203, $pop366
-; CHECK-NEXT:    i32.const $push57=, 27
-; CHECK-NEXT:    i32.shl $push205=, $pop204, $pop57
-; CHECK-NEXT:    i8x16.extract_lane_u $push199=, $0, 10
-; CHECK-NEXT:    i32.const $push365=, 1
-; CHECK-NEXT:    i32.and $push200=, $pop199, $pop365
-; CHECK-NEXT:    i32.const $push52=, 26
-; CHECK-NEXT:    i32.shl $push201=, $pop200, $pop52
-; CHECK-NEXT:    i8x16.extract_lane_u $push195=, $0, 9
-; CHECK-NEXT:    i32.const $push364=, 1
-; CHECK-NEXT:    i32.and $push196=, $pop195, $pop364
-; CHECK-NEXT:    i32.const $push47=, 25
-; CHECK-NEXT:    i32.shl $push197=, $pop196, $pop47
-; CHECK-NEXT:    i8x16.extract_lane_u $push191=, $0, 8
-; CHECK-NEXT:    i32.const $push363=, 1
-; CHECK-NEXT:    i32.and $push192=, $pop191, $pop363
-; CHECK-NEXT:    i32.const $push42=, 24
-; CHECK-NEXT:    i32.shl $push193=, $pop192, $pop42
-; CHECK-NEXT:    i8x16.extract_lane_u $push187=, $0, 7
-; CHECK-NEXT:    i32.const $push362=, 1
-; CHECK-NEXT:    i32.and $push188=, $pop187, $pop362
-; CHECK-NEXT:    i32.const $push37=, 23
-; CHECK-NEXT:    i32.shl $push189=, $pop188, $pop37
-; CHECK-NEXT:    i8x16.extract_lane_u $push183=, $0, 6
-; CHECK-NEXT:    i32.const $push361=, 1
-; CHECK-NEXT:    i32.and $push184=, $pop183, $pop361
-; CHECK-NEXT:    i32.const $push32=, 22
-; CHECK-NEXT:    i32.shl $push185=, $pop184, $pop32
-; CHECK-NEXT:    i8x16.extract_lane_u $push179=, $0, 5
-; CHECK-NEXT:    i32.const $push360=, 1
-; CHECK-NEXT:    i32.and $push180=, $pop179, $pop360
-; CHECK-NEXT:    i32.const $push27=, 21
-; CHECK-NEXT:    i32.shl $push181=, $pop180, $pop27
-; CHECK-NEXT:    i8x16.extract_lane_u $push175=, $0, 4
-; CHECK-NEXT:    i32.const $push359=, 1
-; CHECK-NEXT:    i32.and $push176=, $pop175, $pop359
-; CHECK-NEXT:    i32.const $push22=, 20
-; CHECK-NEXT:    i32.shl $push177=, $pop176, $pop22
-; CHECK-NEXT:    i8x16.extract_lane_u $push171=, $0, 3
-; CHECK-NEXT:    i32.const $push358=, 1
-; CHECK-NEXT:    i32.and $push172=, $pop171, $pop358
-; CHECK-NEXT:    i32.const $push17=, 19
-; CHECK-NEXT:    i32.shl $push173=, $pop172, $pop17
-; CHECK-NEXT:    i8x16.extract_lane_u $push167=, $0, 2
-; CHECK-NEXT:    i32.const $push357=, 1
-; CHECK-NEXT:    i32.and $push168=, $pop167, $pop357
-; CHECK-NEXT:    i32.const $push12=, 18
-; CHECK-NEXT:    i32.shl $push169=, $pop168, $pop12
-; CHECK-NEXT:    i8x16.extract_lane_u $push163=, $0, 1
-; CHECK-NEXT:    i32.const $push356=, 1
-; CHECK-NEXT:    i32.and $push164=, $pop163, $pop356
-; CHECK-NEXT:    i32.const $push7=, 17
-; CHECK-NEXT:    i32.shl $push165=, $pop164, $pop7
-; CHECK-NEXT:    i8x16.extract_lane_u $push160=, $0, 0
-; CHECK-NEXT:    i32.const $push355=, 1
-; CHECK-NEXT:    i32.and $push161=, $pop160, $pop355
-; CHECK-NEXT:    i32.const $push3=, 16
-; CHECK-NEXT:    i32.shl $push162=, $pop161, $pop3
-; CHECK-NEXT:    i32.or $push166=, $pop165, $pop162
-; CHECK-NEXT:    i32.or $push170=, $pop169, $pop166
-; CHECK-NEXT:    i32.or $push174=, $pop173, $pop170
-; CHECK-NEXT:    i32.or $push178=, $pop177, $pop174
-; CHECK-NEXT:    i32.or $push182=, $pop181, $pop178
-; CHECK-NEXT:    i32.or $push186=, $pop185, $pop182
-; CHECK-NEXT:    i32.or $push190=, $pop189, $pop186
-; CHECK-NEXT:    i32.or $push194=, $pop193, $pop190
-; CHECK-NEXT:    i32.or $push198=, $pop197, $pop194
-; CHECK-NEXT:    i32.or $push202=, $pop201, $pop198
-; CHECK-NEXT:    i32.or $push206=, $pop205, $pop202
-; CHECK-NEXT:    i32.or $push210=, $pop209, $pop206
-; CHECK-NEXT:    i32.or $push214=, $pop213, $pop210
-; CHECK-NEXT:    i32.or $push218=, $pop217, $pop214
-; CHECK-NEXT:    i32.or $push221=, $pop220, $pop218
-; CHECK-NEXT:    i32.or $push284=, $pop283, $pop221
-; CHECK-NEXT:    i64.extend_i32_u $push285=, $pop284
-; CHECK-NEXT:    i8x16.eq $push354=, $2, $4
-; CHECK-NEXT:    local.tee $push353=, $0=, $pop354
-; CHECK-NEXT:    i8x16.extract_lane_u $push79=, $pop353, 0
-; CHECK-NEXT:    i32.const $push352=, 1
-; CHECK-NEXT:    i32.and $push80=, $pop79, $pop352
-; CHECK-NEXT:    i8x16.extract_lane_u $push81=, $0, 1
-; CHECK-NEXT:    i32.const $push351=, 1
-; CHECK-NEXT:    i32.and $push82=, $pop81, $pop351
-; CHECK-NEXT:    i32.const $push350=, 1
-; CHECK-NEXT:    i32.shl $push83=, $pop82, $pop350
-; CHECK-NEXT:    i32.or $push84=, $pop80, $pop83
-; CHECK-NEXT:    i8x16.extract_lane_u $push85=, $0, 2
-; CHECK-NEXT:    i32.const $push349=, 1
-; CHECK-NEXT:    i32.and $push86=, $pop85, $pop349
-; CHECK-NEXT:    i32.const $push348=, 2
-; CHECK-NEXT:    i32.shl $push88=, $pop86, $pop348
-; CHECK-NEXT:    i32.or $push89=, $pop84, $pop88
-; CHECK-NEXT:    i8x16.extract_lane_u $push90=, $0, 3
-; CHECK-NEXT:    i32.const $push347=, 1
-; CHECK-NEXT:    i32.and $push91=, $pop90, $pop347
-; CHECK-NEXT:    i32.const $push346=, 3
-; CHECK-NEXT:    i32.shl $push93=, $pop91, $pop346
-; CHECK-NEXT:    i32.or $push94=, $pop89, $pop93
-; CHECK-NEXT:    i8x16.extract_lane_u $push95=, $0, 4
-; CHECK-NEXT:    i32.const $push345=, 1
-; CHECK-NEXT:    i32.and $push96=, $pop95, $pop345
-; CHECK-NEXT:    i32.const $push344=, 4
-; CHECK-NEXT:    i32.shl $push98=, $pop96, $pop344
-; CHECK-NEXT:    i32.or $push99=, $pop94, $pop98
-; CHECK-NEXT:    i8x16.extract_lane_u $push100=, $0, 5
-; CHECK-NEXT:    i32.const $push343=, 1
-; CHECK-NEXT:    i32.and $push101=, $pop100, $pop343
-; CHECK-NEXT:    i32.const $push342=, 5
-; CHECK-NEXT:    i32.shl $push103=, $pop101, $pop342
-; CHECK-NEXT:    i32.or $push104=, $pop99, $pop103
-; CHECK-NEXT:    i8x16.extract_lane_u $push105=, $0, 6
-; CHECK-NEXT:    i32.const $push341=, 1
-; CHECK-NEXT:    i32.and $push106=, $pop105, $pop341
-; CHECK-NEXT:    i32.const $push340=, 6
-; CHECK-NEXT:    i32.shl $push108=, $pop106, $pop340
-; CHECK-NEXT:    i32.or $push109=, $pop104, $pop108
-; CHECK-NEXT:    i8x16.extract_lane_u $push110=, $0, 7
-; CHECK-NEXT:    i32.const $push339=, 1
-; CHECK-NEXT:    i32.and $push111=, $pop110, $pop339
-; CHECK-NEXT:    i32.const $push338=, 7
-; CHECK-NEXT:    i32.shl $push113=, $pop111, $pop338
-; CHECK-NEXT:    i32.or $push114=, $pop109, $pop113
-; CHECK-NEXT:    i8x16.extract_lane_u $push115=, $0, 8
-; CHECK-NEXT:    i32.const $push337=, 1
-; CHECK-NEXT:    i32.and $push116=, $pop115, $pop337
-; CHECK-NEXT:    i32.const $push336=, 8
-; CHECK-NEXT:    i32.shl $push118=, $pop116, $pop336
-; CHECK-NEXT:    i32.or $push119=, $pop114, $pop118
-; CHECK-NEXT:    i8x16.extract_lane_u $push120=, $0, 9
-; CHECK-NEXT:    i32.const $push335=, 1
-; CHECK-NEXT:    i32.and $push121=, $pop120, $pop335
-; CHECK-NEXT:    i32.const $push334=, 9
-; CHECK-NEXT:    i32.shl $push123=, $pop121, $pop334
-; CHECK-NEXT:    i32.or $push124=, $pop119, $pop123
-; CHECK-NEXT:    i8x16.extract_lane_u $push125=, $0, 10
-; CHECK-NEXT:    i32.const $push333=, 1
-; CHECK-NEXT:    i32.and $push126=, $pop125, $pop333
-; CHECK-NEXT:    i32.const $push332=, 10
-; CHECK-NEXT:    i32.shl $push128=, $pop126, $pop332
-; CHECK-NEXT:    i32.or $push129=, $pop124, $pop128
-; CHECK-NEXT:    i8x16.extract_lane_u $push130=, $0, 11
-; CHECK-NEXT:    i32.const $push331=, 1
-; CHECK-NEXT:    i32.and $push131=, $pop130, $pop331
-; CHECK-NEXT:    i32.const $push330=, 11
-; CHECK-NEXT:    i32.shl $push133=, $pop131, $pop330
-; CHECK-NEXT:    i32.or $push134=, $pop129, $pop133
-; CHECK-NEXT:    i8x16.extract_lane_u $push135=, $0, 12
-; CHECK-NEXT:    i32.const $push329=, 1
-; CHECK-NEXT:    i32.and $push136=, $pop135, $pop329
-; CHECK-NEXT:    i32.const $push328=, 12
-; CHECK-NEXT:    i32.shl $push138=, $pop136, $pop328
-; CHECK-NEXT:    i32.or $push139=, $pop134, $pop138
-; CHECK-NEXT:    i8x16.extract_lane_u $push140=, $0, 13
-; CHECK-NEXT:    i32.const $push327=, 1
-; CHECK-NEXT:    i32.and $push141=, $pop140, $pop327
-; CHECK-NEXT:    i32.const $push326=, 13
-; CHECK-NEXT:    i32.shl $push143=, $pop141, $pop326
-; CHECK-NEXT:    i32.or $push144=, $pop139, $pop143
-; CHECK-NEXT:    i8x16.extract_lane_u $push145=, $0, 14
-; CHECK-NEXT:    i32.const $push325=, 1
-; CHECK-NEXT:    i32.and $push146=, $pop145, $pop325
-; CHECK-NEXT:    i32.const $push324=, 14
-; CHECK-NEXT:    i32.shl $push148=, $pop146, $pop324
-; CHECK-NEXT:    i32.or $push149=, $pop144, $pop148
-; CHECK-NEXT:    i8x16.extract_lane_u $push150=, $0, 15
-; CHECK-NEXT:    i32.const $push323=, 15
-; CHECK-NEXT:    i32.shl $push152=, $pop150, $pop323
-; CHECK-NEXT:    i32.or $push153=, $pop149, $pop152
-; CHECK-NEXT:    i32.const $push322=, 65535
-; CHECK-NEXT:    i32.and $push155=, $pop153, $pop322
-; CHECK-NEXT:    i8x16.eq $push321=, $3, $4
-; CHECK-NEXT:    local.tee $push320=, $0=, $pop321
-; CHECK-NEXT:    i8x16.extract_lane_u $push75=, $pop320, 15
-; CHECK-NEXT:    i32.const $push319=, 31
-; CHECK-NEXT:    i32.shl $push77=, $pop75, $pop319
-; CHECK-NEXT:    i8x16.extract_lane_u $push70=, $0, 14
-; CHECK-NEXT:    i32.const $push318=, 1
-; CHECK-NEXT:    i32.and $push71=, $pop70, $pop318
-; CHECK-NEXT:    i32.const $push317=, 30
-; CHECK-NEXT:    i32.shl $push73=, $pop71, $pop317
-; CHECK-NEXT:    i8x16.extract_lane_u $push65=, $0, 13
-; CHECK-NEXT:    i32.const $push316=, 1
-; CHECK-NEXT:    i32.and $push66=, $pop65, $pop316
-; CHECK-NEXT:    i32.const $push315=, 29
-; CHECK-NEXT:    i32.shl $push68=, $pop66, $pop315
-; CHECK-NEXT:    i8x16.extract_lane_u $push60=, $0, 12
-; CHECK-NEXT:    i32.const $push314=, 1
-; CHECK-NEXT:    i32.and $push61=, $pop60, $pop314
-; CHECK-NEXT:    i32.const $push313=, 28
-; CHECK-NEXT:    i32.shl $push63=, $pop61, $pop313
-; CHECK-NEXT:    i8x16.extract_lane_u $push55=, $0, 11
-; CHECK-NEXT:    i32.const $push312=, 1
-; CHECK-NEXT:    i32.and $push56=, $pop55, $pop312
-; CHECK-NEXT:    i32.const $push311=, 27
-; CHECK-NEXT:    i32.shl $push58=, $pop56, $pop311
-; CHECK-NEXT:    i8x16.extract_lane_u $push50=, $0, 10
-; CHECK-NEXT:    i32.const $push310=, 1
-; CHECK-NEXT:    i32.and $push51=, $pop50, $pop310
-; CHECK-NEXT:    i32.const $push309=, 26
-; CHECK-NEXT:    i32.shl $push53=, $pop51, $pop309
-; CHECK-NEXT:    i8x16.extract_lane_u $push45=, $0, 9
-; CHECK-NEXT:    i32.const $push308=, 1
-; CHECK-NEXT:    i32.and $push46=, $pop45, $pop308
-; CHECK-NEXT:    i32.const $push307=, 25
-; CHECK-NEXT:    i32.shl $push48=, $pop46, $pop307
-; CHECK-NEXT:    i8x16.extract_lane_u $push40=, $0, 8
-; CHECK-NEXT:    i32.const $push306=, 1
-; CHECK-NEXT:    i32.and $push41=, $pop40, $pop306
-; CHECK-NEXT:    i32.const $push305=, 24
-; CHECK-NEXT:    i32.shl $push43=, $pop41, $pop305
-; CHECK-NEXT:    i8x16.extract_lane_u $push35=, $0, 7
-; CHECK-NEXT:    i32.const $push304=, 1
-; CHECK-NEXT:    i32.and $push36=, $pop35, $pop304
-; CHECK-NEXT:    i32.const $push303=, 23
-; CHECK-NEXT:    i32.shl $push38=, $pop36, $pop303
-; CHECK-NEXT:    i8x16.extract_lane_u $push30=, $0, 6
-; CHECK-NEXT:    i32.const $push302=, 1
-; CHECK-NEXT:    i32.and $push31=, $pop30, $pop302
-; CHECK-NEXT:    i32.const $push301=, 22
-; CHECK-NEXT:    i32.shl $push33=, $pop31, $pop301
-; CHECK-NEXT:    i8x16.extract_lane_u $push25=, $0, 5
-; CHECK-NEXT:    i32.const $push300=, 1
-; CHECK-NEXT:    i32.and $push26=, $pop25, $pop300
-; CHECK-NEXT:    i32.const $push299=, 21
-; CHECK-NEXT:    i32.shl $push28=, $pop26, $pop299
-; CHECK-NEXT:    i8x16.extract_lane_u $push20=, $0, 4
-; CHECK-NEXT:    i32.const $push298=, 1
-; CHECK-NEXT:    i32.and $push21=, $pop20, $pop298
-; CHECK-NEXT:    i32.const $push297=, 20
-; CHECK-NEXT:    i32.shl $push23=, $pop21, $pop297
-; CHECK-NEXT:    i8x16.extract_lane_u $push15=, $0, 3
-; CHECK-NEXT:    i32.const $push296=, 1
-; CHECK-NEXT:    i32.and $push16=, $pop15, $pop296
-; CHECK-NEXT:    i32.const $push295=, 19
-; CHECK-NEXT:    i32.shl $push18=, $pop16, $pop295
-; CHECK-NEXT:    i8x16.extract_lane_u $push10=, $0, 2
-; CHECK-NEXT:    i32.const $push294=, 1
-; CHECK-NEXT:    i32.and $push11=, $pop10, $pop294
-; CHECK-NEXT:    i32.const $push293=, 18
-; CHECK-NEXT:    i32.shl $push13=, $pop11, $pop293
-; CHECK-NEXT:    i8x16.extract_lane_u $push5=, $0, 1
-; CHECK-NEXT:    i32.const $push292=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop5, $pop292
-; CHECK-NEXT:    i32.const $push291=, 17
-; CHECK-NEXT:    i32.shl $push8=, $pop6, $pop291
-; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $0, 0
-; CHECK-NEXT:    i32.const $push290=, 1
-; CHECK-NEXT:    i32.and $push2=, $pop0, $pop290
-; CHECK-NEXT:    i32.const $push289=, 16
-; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop289
-; CHECK-NEXT:    i32.or $push9=, $pop8, $pop4
-; CHECK-NEXT:    i32.or $push14=, $pop13, $pop9
-; CHECK-NEXT:    i32.or $push19=, $pop18, $pop14
-; CHECK-NEXT:    i32.or $push24=, $pop23, $pop19
-; CHECK-NEXT:    i32.or $push29=, $pop28, $pop24
-; CHECK-NEXT:    i32.or $push34=, $pop33, $pop29
-; CHECK-NEXT:    i32.or $push39=, $pop38, $pop34
-; CHECK-NEXT:    i32.or $push44=, $pop43, $pop39
-; CHECK-NEXT:    i32.or $push49=, $pop48, $pop44
-; CHECK-NEXT:    i32.or $push54=, $pop53, $pop49
-; CHECK-NEXT:    i32.or $push59=, $pop58, $pop54
-; CHECK-NEXT:    i32.or $push64=, $pop63, $pop59
-; CHECK-NEXT:    i32.or $push69=, $pop68, $pop64
-; CHECK-NEXT:    i32.or $push74=, $pop73, $pop69
-; CHECK-NEXT:    i32.or $push78=, $pop77, $pop74
-; CHECK-NEXT:    i32.or $push156=, $pop155, $pop78
-; CHECK-NEXT:    i64.extend_i32_u $push157=, $pop156
-; CHECK-NEXT:    i64.const $push158=, 32
-; CHECK-NEXT:    i64.shl $push159=, $pop157, $pop158
-; CHECK-NEXT:    i64.or $push286=, $pop285, $pop159
-; CHECK-NEXT:    return $pop286
+; CHECK-NEXT:    i64.const $push3=, 16
+; CHECK-NEXT:    i64.const $push24=, 16
+; CHECK-NEXT:    i64.const $push23=, 16
+; CHECK-NEXT:    v128.const $push22=, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+; CHECK-NEXT:    local.tee $push21=, $4=, $pop22
+; CHECK-NEXT:    i8x16.eq $push0=, $0, $pop21
+; CHECK-NEXT:    i8x16.bitmask $push1=, $pop0
+; CHECK-NEXT:    i64.extend_i32_u $push2=, $pop1
+; CHECK-NEXT:    i64.const $push20=, 16
+; CHECK-NEXT:    i64.add $push4=, $pop2, $pop20
+; CHECK-NEXT:    i64.shl $push5=, $pop23, $pop4
+; CHECK-NEXT:    i8x16.eq $push6=, $1, $4
+; CHECK-NEXT:    i8x16.bitmask $push7=, $pop6
+; CHECK-NEXT:    i64.extend_i32_u $push8=, $pop7
+; CHECK-NEXT:    i64.add $push9=, $pop5, $pop8
+; CHECK-NEXT:    i64.shl $push10=, $pop24, $pop9
+; CHECK-NEXT:    i8x16.eq $push11=, $2, $4
+; CHECK-NEXT:    i8x16.bitmask $push12=, $pop11
+; CHECK-NEXT:    i64.extend_i32_u $push13=, $pop12
+; CHECK-NEXT:    i64.add $push14=, $pop10, $pop13
+; CHECK-NEXT:    i64.shl $push15=, $pop3, $pop14
+; CHECK-NEXT:    i8x16.eq $push16=, $3, $4
+; CHECK-NEXT:    i8x16.bitmask $push17=, $pop16
+; CHECK-NEXT:    i64.extend_i32_u $push18=, $pop17
+; CHECK-NEXT:    i64.add $push19=, $pop15, $pop18
+; CHECK-NEXT:    return $pop19
     %z = icmp eq <64 x i8> %x, splat (i8 64)
     %res = bitcast <64 x i1> %z to i64
     ret i64 %res



More information about the llvm-commits mailing list