[llvm] [WebAssembly] vf32 to vi8, vi16 lowering (PR #164644)
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 05:43:30 PDT 2025
https://github.com/sparker-arm updated https://github.com/llvm/llvm-project/pull/164644
>From a02ac5d4f5fa41cd7f54b0f28aa63bae58bb3a62 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Wed, 22 Oct 2025 16:10:38 +0100
Subject: [PATCH 1/5] [WebAssembly] v4f32 to v4i8 lowering
Avoid scalarizing the conversion and use trunc_sat and narrow
instead.
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 29 +-
.../CodeGen/WebAssembly/memory-interleave.ll | 912 ++++++++----------
2 files changed, 442 insertions(+), 499 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 7ec463bdc3b84..2410aab4aa660 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -216,7 +216,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
// into conversion ops
setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
- ISD::FP_ROUND, ISD::CONCAT_VECTORS});
+ ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_ROUND,
+ ISD::CONCAT_VECTORS});
setTargetDAGCombine(ISD::TRUNCATE);
@@ -3597,6 +3598,29 @@ static SDValue performMulCombine(SDNode *N,
}
}
+SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT OutVT = N->getValueType(0);
+ if (N->getValueType(0) == MVT::v4i8) {
+ // v4f32 -> v4i32
+ EVT InVT = MVT::v4i32;
+ SDValue ToInt = DAG.getNode(N->getOpcode(), DL, InVT, N->getOperand(0));
+ APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
+ OutVT.getScalarSizeInBits());
+ SDValue Masked =
+ DAG.getNode(ISD::AND, DL, InVT, ToInt, DAG.getConstant(Mask, DL, InVT));
+ SDValue BigFakeVector =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i32,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Masked,
+ DAG.getUNDEF(MVT::v4i32)),
+ DAG.getUNDEF(MVT::v8i32));
+ SDValue Trunc =
+ truncateVectorWithNARROW(MVT::v16i8, BigFakeVector, DL, DAG);
+ return DAG.getBitcast(OutVT, extractSubVector(Trunc, 0, DAG, DL, 32));
+ }
+ return SDValue();
+}
+
SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -3623,6 +3647,9 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND:
case ISD::CONCAT_VECTORS:
return performVectorTruncZeroCombine(N, DCI);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return performConvertFPCombine(N, DCI.DAG);
case ISD::TRUNCATE:
return performTruncateCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
index 104ec31e13d3e..4097710cdee52 100644
--- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
+++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
+; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
@@ -20,17 +20,17 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: i32x4.add
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: i32x4.add
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
; CHECK: v128.store
define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -64,17 +64,17 @@ define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0,
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: i32x4.add
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: i32x4.sub
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
; CHECK: v128.store
define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -208,27 +208,27 @@ define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i16x8.sub
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i16x8.sub
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i16x8.sub
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i16x8.sub
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -276,27 +276,27 @@ define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.or
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.or
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.xor
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.xor
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -343,27 +343,27 @@ define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly
; CHECK-LABEL: four_shorts_interleave_op:
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.or
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.xor
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.or
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: v128.xor
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -483,19 +483,19 @@ define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
; CHECK: i16x8.extmul_high_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
; CHECK: i16x8.extmul_high_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
+; CHECK: i8x16.shuffle 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
; CHECK: v128.store
; CHECK: i16x8.extmul_low_i8x16_u
; CHECK: i16x8.extmul_low_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
+; CHECK: i8x16.shuffle 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
; CHECK: v128.store
define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -529,18 +529,18 @@ define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
; CHECK: i16x8.extmul_high_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
; CHECK: i8x16.sub
-; CHECK: i8x16.shuffle {{.*}} 0, 24, 2, 25, 4, 26, 6, 27, 8, 28, 10, 29, 12, 30, 14, 31
+; CHECK: i8x16.shuffle 0, 24, 2, 25, 4, 26, 6, 27, 8, 28, 10, 29, 12, 30, 14, 31
; CHECK: v128.store
; CHECK: i16x8.extmul_low_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 17, 4, 18, 6, 19, 8, 20, 10, 21, 12, 22, 14, 23
+; CHECK: i8x16.shuffle 0, 16, 2, 17, 4, 18, 6, 19, 8, 20, 10, 21, 12, 22, 14, 23
; CHECK: v128.store
define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -672,27 +672,27 @@ define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writ
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.and
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.and
-; CHECK: i8x16.shuffle {{.*}} 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.and
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.and
-; CHECK: i8x16.shuffle {{.*}} 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: i8x16.shuffle 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23
+; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: i8x16.shuffle 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0
+; CHECK: i8x16.shuffle 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19
+; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
; CHECK: v128.store
define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -740,25 +740,25 @@ define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}}, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}}, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extmul_low_i8x16_u
-; CHECK: i8x16.shuffle {{.*}}, 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}}, 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extmul_low_i8x16_u
-; CHECK: i8x16.shuffle {{.*}}, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
-; CHECK: i8x16.shuffle {{.*}}, 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}}, 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.sub
-; CHECK: i8x16.shuffle {{.*}}, 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}}, 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.sub
-; CHECK: i8x16.shuffle {{.*}}, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}}, 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}}, 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27
+; CHECK: i8x16.shuffle 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27
; CHECK: v128.store
define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -806,27 +806,27 @@ define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.add
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.sub
-; CHECK: i8x16.shuffle {{.*}} 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.add
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.sub
-; CHECK: i8x16.shuffle {{.*}} 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: i8x16.shuffle 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23
+; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: i8x16.shuffle 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0
+; CHECK: i8x16.shuffle 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19
+; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
; CHECK: v128.store
define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -1272,45 +1272,45 @@ define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noun
; CHECK-LABEL: four_bytes_into_four_ints_vary_op:
; CHECK: loop
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i32x4.extend_low_i16x8_u
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i32x4.extend_low_i16x8_u
; CHECK: i32x4.add
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i32x4.extend_low_i16x8_u
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i32x4.extend_low_i16x8_u
; CHECK: i32x4.sub
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i32x4.extmul_low_i16x8_u
; CHECK: v128.and
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i32x4.extend_low_i16x8_u
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
@@ -1365,7 +1365,7 @@ define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noun
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
; CHECK: v128.store
define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
%5 = icmp sgt i32 %3, 0
@@ -1396,35 +1396,35 @@ define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 no
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i16x8.add
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i16x8.add
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i16x8.add
; CHECK: i16x8.add
; CHECK: i16x8.shr_u
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i16x8.add
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i16x8.add
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_u
; CHECK: i16x8.add
; CHECK: i16x8.add
; CHECK: i16x8.shr_u
-; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
+; CHECK: i8x16.shuffle 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
; CHECK: v128.store
define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
%5 = icmp sgt i32 %3, 0
@@ -1492,13 +1492,13 @@ define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i3
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.avgr_u
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i8x16.avgr_u
-; CHECK: i8x16.shuffle {{.*}} 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
+; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
; CHECK: v128.store
define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
%5 = icmp sgt i32 %3, 0
@@ -1605,28 +1605,28 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: two_bytes_two_floats_same_op:
; CHECK: loop
; CHECK: v128.load64_zero
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load64_zero
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
; CHECK: v128.store
define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -1663,28 +1663,28 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: two_bytes_two_floats_vary_op:
; CHECK: v128.load64_zero
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load64_zero
-; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.add
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.sub
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
; CHECK: v128.store
define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -1723,38 +1723,24 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 255, 255, 255, 255
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.store64_lane
define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -1791,38 +1777,24 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: f32x4.add
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 255, 255, 255, 255
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: f32x4.sub
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: v128.store64_lane
define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -1858,24 +1830,24 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: two_shorts_two_floats_same_op:
; CHECK: loop
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
; CHECK: v128.store
define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -1913,24 +1885,24 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: two_shorts_two_floats_vary_op:
; CHECK: loop
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.add
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.sub
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
; CHECK: v128.store
define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -1969,16 +1941,16 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: f32x4.mul
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: f32x4.mul
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
@@ -2037,16 +2009,16 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: f32x4.add
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: f32x4.sub
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
@@ -2198,58 +2170,58 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: four_bytes_four_floats_same_op:
; CHECK: loop
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2305,58 +2277,58 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: four_bytes_four_floats_vary_op:
; CHECK: loop
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.add
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.div
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK: i16x8.extend_low_i8x16_s
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.sub
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2413,88 +2385,60 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 255, 255, 255, 255
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 4, 24, 28, 1, 5, 25, 29, 2, 6, 26, 30, 3, 7, 27, 31
; CHECK: v128.store
define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2547,88 +2491,60 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 255, 255, 255, 255
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.add
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.div
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.sub
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i8x16.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.narrow_i16x8_u
+; CHECK: i8x16.shuffle 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 4, 24, 28, 1, 5, 25, 29, 2, 6, 26, 30, 3, 7, 27, 31
; CHECK: v128.store
define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2681,51 +2597,51 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2782,47 +2698,47 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.mul
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.add
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.div
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
-; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK: i32x4.extend_low_i16x8_s
; CHECK: f32x4.convert_i32x4_s
; CHECK: f32x4.sub
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.store
define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2879,48 +2795,48 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
@@ -3014,48 +2930,48 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.add
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.div
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.sub
; CHECK: f32x4.extract_lane
; CHECK: i32.trunc_sat_f32_s
>From bf15087bf14f1a8abdad4d28e06f5b73240c5ce3 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Wed, 22 Oct 2025 16:23:09 +0100
Subject: [PATCH 2/5] check for f32
---
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 2410aab4aa660..62c091ba32227 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3601,14 +3601,17 @@ static SDValue performMulCombine(SDNode *N,
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT OutVT = N->getValueType(0);
- if (N->getValueType(0) == MVT::v4i8) {
- // v4f32 -> v4i32
+ if (N->getValueType(0) == MVT::v4i8 &&
+ N->getOperand(0)->getValueType(0) == MVT::v4f32) {
+ // First, convert to i32.
EVT InVT = MVT::v4i32;
SDValue ToInt = DAG.getNode(N->getOpcode(), DL, InVT, N->getOperand(0));
APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
OutVT.getScalarSizeInBits());
+ // Mask out the top 3 MSBs.
SDValue Masked =
DAG.getNode(ISD::AND, DL, InVT, ToInt, DAG.getConstant(Mask, DL, InVT));
+ // Create a wide enough vector that we can use narrow: v16i32 -> v16i8.
SDValue BigFakeVector =
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i32,
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Masked,
>From 1e34de64266785670ed375da35213e457ddd20a5 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Thu, 23 Oct 2025 10:52:53 +0100
Subject: [PATCH 3/5] added v4i16 support
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 32 +-
.../CodeGen/WebAssembly/memory-interleave.ll | 302 ++++++------------
2 files changed, 124 insertions(+), 210 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 62c091ba32227..d2d2c50cfea97 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3601,25 +3601,33 @@ static SDValue performMulCombine(SDNode *N,
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT OutVT = N->getValueType(0);
- if (N->getValueType(0) == MVT::v4i8 &&
- N->getOperand(0)->getValueType(0) == MVT::v4f32) {
+ if (N->getOperand(0)->getValueType(0) == MVT::v4f32) {
// First, convert to i32.
EVT InVT = MVT::v4i32;
SDValue ToInt = DAG.getNode(N->getOpcode(), DL, InVT, N->getOperand(0));
APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
OutVT.getScalarSizeInBits());
- // Mask out the top 3 MSBs.
+ // Mask out the top MSBs.
SDValue Masked =
DAG.getNode(ISD::AND, DL, InVT, ToInt, DAG.getConstant(Mask, DL, InVT));
- // Create a wide enough vector that we can use narrow: v16i32 -> v16i8.
- SDValue BigFakeVector =
- DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i32,
- DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Masked,
- DAG.getUNDEF(MVT::v4i32)),
- DAG.getUNDEF(MVT::v8i32));
- SDValue Trunc =
- truncateVectorWithNARROW(MVT::v16i8, BigFakeVector, DL, DAG);
- return DAG.getBitcast(OutVT, extractSubVector(Trunc, 0, DAG, DL, 32));
+
+ if (N->getValueType(0) == MVT::v4i8) {
+ // Create a wide enough vector that we can use narrow: v16i32 -> v16i8.
+ SDValue WideVector =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i32,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Masked,
+ DAG.getUNDEF(MVT::v4i32)),
+ DAG.getUNDEF(MVT::v8i32));
+ SDValue Trunc = truncateVectorWithNARROW(MVT::v16i8, WideVector, DL, DAG);
+ return DAG.getBitcast(OutVT, extractSubVector(Trunc, 0, DAG, DL, 32));
+ }
+ if (N->getValueType(0) == MVT::v4i16) {
+ // Create a wide enough vector that we can use narrow: v8i32 -> v8i16.
+ SDValue WideVector = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32,
+ Masked, DAG.getUNDEF(MVT::v4i32));
+ SDValue Trunc = truncateVectorWithNARROW(MVT::v8i16, WideVector, DL, DAG);
+ return DAG.getBitcast(OutVT, extractSubVector(Trunc, 0, DAG, DL, 64));
+ }
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
index 4097710cdee52..acc2a3ee7af14 100644
--- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
+++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
@@ -1941,38 +1941,22 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 65535, 65535, 65535, 65535
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
; CHECK: v128.store
define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2014,33 +1998,17 @@ for.body: ; preds = %entry, %for.body
; CHECK: v128.load
; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
; CHECK: f32x4.add
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 65535, 65535, 65535, 65535
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
; CHECK: f32x4.sub
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23
; CHECK: v128.store
define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2795,89 +2763,58 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 65535, 65535, 65535, 65535
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31
; CHECK: v128.store
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.splat
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27
; CHECK: v128.store
define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
@@ -2930,89 +2867,58 @@ for.body: ; preds = %entry, %for.body
; CHECK: loop
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK: v128.load
; CHECK: v128.load
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.mul
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.splat
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.const 65535, 65535, 65535, 65535
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.add
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.div
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
-; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
; CHECK: f32x4.sub
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
+; CHECK: i32x4.trunc_sat_f32x4_s
+; CHECK: v128.and
+; CHECK: i16x8.narrow_i32x4_u
+; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31
; CHECK: v128.store
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.splat
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
-; CHECK: f32x4.extract_lane
-; CHECK: i32.trunc_sat_f32_s
-; CHECK: i16x8.replace_lane
+; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27
; CHECK: v128.store
define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
entry:
>From 63d73928aace13e0b10dd3509392a40652f9d911 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Thu, 23 Oct 2025 12:07:05 +0100
Subject: [PATCH 4/5] made it generic to support not just v4 vectors
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 80 ++--
.../CodeGen/WebAssembly/fpclamptosat_vec.ll | 342 ++++++++++++++++++
2 files changed, 394 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index d2d2c50cfea97..94f04ad8935d7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3598,36 +3598,60 @@ static SDValue performMulCombine(SDNode *N,
}
}
+SDValue DoubleVectorWidth(SDValue In, EVT InVT, unsigned RequiredNumElems,
+ SelectionDAG &DAG) {
+ SDLoc DL(In);
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned NumElems = InVT.getVectorNumElements() * 2;
+ EVT OutVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
+ SDValue Concat =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getUNDEF(InVT));
+ if (NumElems < RequiredNumElems) {
+ return DoubleVectorWidth(Concat, OutVT, RequiredNumElems, DAG);
+ }
+ return Concat;
+}
+
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
- SDLoc DL(N);
EVT OutVT = N->getValueType(0);
- if (N->getOperand(0)->getValueType(0) == MVT::v4f32) {
- // First, convert to i32.
- EVT InVT = MVT::v4i32;
- SDValue ToInt = DAG.getNode(N->getOpcode(), DL, InVT, N->getOperand(0));
- APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
- OutVT.getScalarSizeInBits());
- // Mask out the top MSBs.
- SDValue Masked =
- DAG.getNode(ISD::AND, DL, InVT, ToInt, DAG.getConstant(Mask, DL, InVT));
-
- if (N->getValueType(0) == MVT::v4i8) {
- // Create a wide enough vector that we can use narrow: v16i32 -> v16i8.
- SDValue WideVector =
- DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i32,
- DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Masked,
- DAG.getUNDEF(MVT::v4i32)),
- DAG.getUNDEF(MVT::v8i32));
- SDValue Trunc = truncateVectorWithNARROW(MVT::v16i8, WideVector, DL, DAG);
- return DAG.getBitcast(OutVT, extractSubVector(Trunc, 0, DAG, DL, 32));
- }
- if (N->getValueType(0) == MVT::v4i16) {
- // Create a wide enough vector that we can use narrow: v8i32 -> v8i16.
- SDValue WideVector = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32,
- Masked, DAG.getUNDEF(MVT::v4i32));
- SDValue Trunc = truncateVectorWithNARROW(MVT::v8i16, WideVector, DL, DAG);
- return DAG.getBitcast(OutVT, extractSubVector(Trunc, 0, DAG, DL, 64));
- }
+ if (!OutVT.isVector())
+ return SDValue();
+
+ EVT OutElTy = OutVT.getVectorElementType();
+ if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
+ return SDValue();
+
+ unsigned NumElems = OutVT.getVectorNumElements();
+ if (!isPowerOf2_32(NumElems))
+ return SDValue();
+
+ EVT FPVT = N->getOperand(0)->getValueType(0);
+ if (FPVT.getVectorElementType() != MVT::f32)
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // First, convert to i32.
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
+ SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
+ APInt Mask = APInt::getLowBitsSet(IntVT.getScalarSizeInBits(),
+ OutVT.getScalarSizeInBits());
+ // Mask out the top MSBs.
+ SDValue Masked =
+ DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
+
+ if (OutVT.getSizeInBits() < 128) {
+ // Create a wide enough vector that we can use narrow.
+ EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
+ unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
+ SDValue WideVector =
+ DoubleVectorWidth(Masked, IntVT, NumRequiredElems, DAG);
+ SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
+ return DAG.getBitcast(
+ OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
+ } else {
+ return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index a8d37be404cf2..c44b3bb5a9968 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -2808,6 +2808,348 @@ entry:
ret <4 x i32> %spec.store.select7
}
+define <2 x i8> @fptosi_v2f32_v2i8(<2 x float> %x) {
+; CHECK-LABEL: fptosi_v2f32_v2i8:
+; CHECK: .functype fptosi_v2f32_v2i8 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <2 x float> %x to <2 x i8>
+ ret <2 x i8> %conv
+}
+
+define <2 x i8> @fptoui_v2f32_v2i8(<2 x float> %x) {
+; CHECK-LABEL: fptoui_v2f32_v2i8:
+; CHECK: .functype fptoui_v2f32_v2i8 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <2 x float> %x to <2 x i8>
+ ret <2 x i8> %conv
+}
+
+define <2 x i16> @fptosi_v2f32_v2i16(<2 x float> %x) {
+; CHECK-LABEL: fptosi_v2f32_v2i16:
+; CHECK: .functype fptosi_v2f32_v2i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <2 x float> %x to <2 x i16>
+ ret <2 x i16> %conv
+}
+
+define <2 x i16> @fptoui_v2f32_v2i16(<2 x float> %x) {
+; CHECK-LABEL: fptoui_v2f32_v2i16:
+; CHECK: .functype fptoui_v2f32_v2i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <2 x float> %x to <2 x i16>
+ ret <2 x i16> %conv
+}
+
+define <4 x i8> @fptosi_v4f32_v4i8(<4 x float> %x) {
+; CHECK-LABEL: fptosi_v4f32_v4i8:
+; CHECK: .functype fptosi_v4f32_v4i8 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <4 x float> %x to <4 x i8>
+ ret <4 x i8> %conv
+}
+
+define <4 x i8> @fptoui_v4f32_v4i8(<4 x float> %x) {
+; CHECK-LABEL: fptoui_v4f32_v4i8:
+; CHECK: .functype fptoui_v4f32_v4i8 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <4 x float> %x to <4 x i8>
+ ret <4 x i8> %conv
+}
+
+define <4 x i16> @fptosi_v4f32_v4i16(<4 x float> %x) {
+; CHECK-LABEL: fptosi_v4f32_v4i16:
+; CHECK: .functype fptosi_v4f32_v4i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <4 x float> %x to <4 x i16>
+ ret <4 x i16> %conv
+}
+
+define <4 x i16> @fptoui_v4f32_v4i16(<4 x float> %x) {
+; CHECK-LABEL: fptoui_v4f32_v4i16:
+; CHECK: .functype fptoui_v4f32_v4i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <4 x float> %x to <4 x i16>
+ ret <4 x i16> %conv
+}
+
+define <8 x i8> @fptosi_v8f32_v8i8(<8 x float> %x) {
+; CHECK-LABEL: fptosi_v8f32_v8i8:
+; CHECK: .functype fptosi_v8f32_v8i8 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <8 x float> %x to <8 x i8>
+ ret <8 x i8> %conv
+}
+
+define <8 x i8> @fptoui_v8f32_v8i8(<8 x float> %x) {
+; CHECK-LABEL: fptoui_v8f32_v8i8:
+; CHECK: .functype fptoui_v8f32_v8i8 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <8 x float> %x to <8 x i8>
+ ret <8 x i8> %conv
+}
+
+define <8 x i16> @fptosi_v8f32_v8i16(<8 x float> %x) {
+; CHECK-LABEL: fptosi_v8f32_v8i16:
+; CHECK: .functype fptosi_v8f32_v8i16 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <8 x float> %x to <8 x i16>
+ ret <8 x i16> %conv
+}
+
+define <8 x i16> @fptoui_v8f32_v8i16(<8 x float> %x) {
+; CHECK-LABEL: fptoui_v8f32_v8i16:
+; CHECK: .functype fptoui_v8f32_v8i16 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <8 x float> %x to <8 x i16>
+ ret <8 x i16> %conv
+}
+
+define <16 x i8> @fptosi_v16f32_v16i8(<16 x float> %x) {
+; CHECK-LABEL: fptosi_v16f32_v16i8:
+; CHECK: .functype fptosi_v16f32_v16i8 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <16 x float> %x to <16 x i8>
+ ret <16 x i8> %conv
+}
+
+define <16 x i8> @fptoui_v16f32_v16i8(<16 x float> %x) {
+; CHECK-LABEL: fptoui_v16f32_v16i8:
+; CHECK: .functype fptoui_v16f32_v16i8 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 255, 255, 255, 255
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <16 x float> %x to <16 x i8>
+ ret <16 x i8> %conv
+}
+
+define <16 x i16> @fptosi_v16f32_v16i16(<16 x float> %x) {
+; CHECK-LABEL: fptosi_v16f32_v16i16:
+; CHECK: .functype fptosi_v16f32_v16i16 (i32, v128, v128, v128, v128) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: v128.store 0
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi <16 x float> %x to <16 x i16>
+ ret <16 x i16> %conv
+}
+
+define <16 x i16> @fptoui_v16f32_v16i16(<16 x float> %x) {
+; CHECK-LABEL: fptoui_v16f32_v16i16:
+; CHECK: .functype fptoui_v16f32_v16i16 (i32, v128, v128, v128, v128) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: v128.and
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: v128.store 0
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptoui <16 x float> %x to <16 x i16>
+ ret <16 x i16> %conv
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
>From 5283f2a7ef7b40ccc6b6d584c10382ebd4e89d47 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker at arm.com>
Date: Thu, 23 Oct 2025 13:42:17 +0100
Subject: [PATCH 5/5] removed unnecessary argument
---
.../lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 94f04ad8935d7..16226e7d6a834 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3598,16 +3598,17 @@ static SDValue performMulCombine(SDNode *N,
}
}
-SDValue DoubleVectorWidth(SDValue In, EVT InVT, unsigned RequiredNumElems,
+SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
SelectionDAG &DAG) {
SDLoc DL(In);
LLVMContext &Ctx = *DAG.getContext();
+ EVT InVT = In.getValueType();
unsigned NumElems = InVT.getVectorNumElements() * 2;
- EVT OutVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
+ EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
SDValue Concat =
DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getUNDEF(InVT));
if (NumElems < RequiredNumElems) {
- return DoubleVectorWidth(Concat, OutVT, RequiredNumElems, DAG);
+ return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
}
return Concat;
}
@@ -3645,8 +3646,7 @@ SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
// Create a wide enough vector that we can use narrow.
EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
- SDValue WideVector =
- DoubleVectorWidth(Masked, IntVT, NumRequiredElems, DAG);
+ SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
return DAG.getBitcast(
OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
More information about the llvm-commits
mailing list