[llvm] 28557e8 - [WebAssembly] Improve codegen for shuffles with undefined lane indices
Thomas Lively via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 13 16:03:28 PDT 2022
Author: Fanchen Kong
Date: 2022-09-13T16:03:18-07:00
New Revision: 28557e8c9872a31a29cf0aca19e9d036caf7eff4
URL: https://github.com/llvm/llvm-project/commit/28557e8c9872a31a29cf0aca19e9d036caf7eff4
DIFF: https://github.com/llvm/llvm-project/commit/28557e8c9872a31a29cf0aca19e9d036caf7eff4.diff
LOG: [WebAssembly] Improve codegen for shuffles with undefined lane indices
For undefined lane indices, fill the mask with {0..N} instead of zeros to allow
further reduction to word/dword shuffle on the VM.
Reviewed By: tlively, penzn
Differential Revision: https://reviews.llvm.org/D133473
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
llvm/test/CodeGen/WebAssembly/simd-concat.ll
llvm/test/CodeGen/WebAssembly/simd-conversions.ll
llvm/test/CodeGen/WebAssembly/simd-extending.ll
llvm/test/CodeGen/WebAssembly/simd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 7ace0ad5ce630..d654372b66504 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2322,8 +2322,10 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Expand mask indices to byte indices and materialize them as operands
for (int M : Mask) {
for (size_t J = 0; J < LaneBytes; ++J) {
- // Lower undefs (represented by -1 in mask) to zero
- uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
+ // Lower undefs (represented by -1 in mask) to {0..J}, which use a
+ // whole lane of vector input, to allow further reduction at VM. E.g.
+ // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
+ uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
}
}
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index c27da85cbf488..e7bd9b53541f1 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -31,7 +31,7 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
; CHECK-NEXT: i64x2.gt_s
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -75,7 +75,7 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
@@ -113,7 +113,7 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-NEXT: i64x2.gt_s
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -302,7 +302,7 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-NEXT: v128.const -32768, -32768, 0, 0
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -329,7 +329,7 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
; CHECK-NEXT: v128.const 65535, 65535, 0, 0
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <2 x double> %x to <2 x i32>
@@ -356,7 +356,7 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-NEXT: v128.const 0, 0, 0, 0
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -379,7 +379,7 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -400,7 +400,7 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <4 x float> %x to <4 x i32>
@@ -421,7 +421,7 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NEXT: v128.const 0, 0, 0, 0
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -1539,7 +1539,7 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: i64x2.gt_s
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -1581,7 +1581,7 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
@@ -1618,7 +1618,7 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: i64x2.gt_s
; CHECK-NEXT: v128.bitselect
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -1795,7 +1795,7 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-NEXT: v128.const -32768, -32768, 0, 0
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -1820,7 +1820,7 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
; CHECK-NEXT: v128.const 65535, 65535, 0, 0
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <2 x double> %x to <2 x i32>
@@ -1846,7 +1846,7 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-NEXT: v128.const 0, 0, 0, 0
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -1867,7 +1867,7 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -1886,7 +1886,7 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <4 x float> %x to <4 x i32>
@@ -1906,7 +1906,7 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-NEXT: v128.const 0, 0, 0, 0
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
index d759aa18f80e7..9be4176ab6beb 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
@@ -190,7 +190,7 @@ define <4 x i32> @half_shuffle_i32x4(<4 x i32> %src) {
; CHECK-LABEL: half_shuffle_i32x4:
; CHECK: .functype half_shuffle_i32x4 (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: i32.const $push1=, 0
; CHECK-NEXT: i32x4.replace_lane $push2=, $pop0, 0, $pop1
; CHECK-NEXT: i32.const $push3=, 3
diff --git a/llvm/test/CodeGen/WebAssembly/simd-concat.ll b/llvm/test/CodeGen/WebAssembly/simd-concat.ll
index ec7fc9836c3ef..42ded8a47c199 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-concat.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-concat.ll
@@ -59,7 +59,7 @@ define <4 x i16> @concat_v2i16(<2 x i16> %a, <2 x i16> %b) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 0, 1, 0, 1, 0, 1
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %v
diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
index 6e12d8aed21b1..f945b8e061717 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -314,7 +314,7 @@ define <4 x double> @convert_low_s_v4f64(<8 x i32> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: f64x2.convert_low_i32x4_s
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 0
@@ -334,7 +334,7 @@ define <4 x double> @convert_low_u_v4f64(<8 x i32> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 0
@@ -355,7 +355,7 @@ define <4 x double> @convert_low_s_v4f64_2(<8 x i32> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: f64x2.convert_low_i32x4_s
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 0
@@ -375,7 +375,7 @@ define <4 x double> @convert_low_u_v4f64_2(<8 x i32> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 0
@@ -395,7 +395,7 @@ define <4 x double> @promote_low_v4f64(<8 x float> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: f64x2.promote_low_f32x4
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 0
@@ -415,7 +415,7 @@ define <4 x double> @promote_low_v4f64_2(<8 x float> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: f64x2.promote_low_f32x4
; CHECK-NEXT: v128.store 16
; CHECK-NEXT: local.get 0
@@ -434,7 +434,7 @@ define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: f64x2.promote_low_f32x4
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <4 x float> %x, <4 x float> %y, <2 x i32> <i32 2, i32 7>
diff --git a/llvm/test/CodeGen/WebAssembly/simd-extending.ll b/llvm/test/CodeGen/WebAssembly/simd-extending.ll
index 0caec7d0b0d0e..1f84e6485dac5 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-extending.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-extending.ll
@@ -188,7 +188,7 @@ define <4 x i32> @extend_lowish_i16x8_s(<8 x i16> %v) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i8x16.shuffle 2, 3, 0, 0, 4, 5, 0, 0, 6, 7, 0, 0, 8, 9, 0, 0
+; CHECK-NEXT: i8x16.shuffle 2, 3, 0, 1, 4, 5, 0, 1, 6, 7, 0, 1, 8, 9, 0, 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32x4.shl
; CHECK-NEXT: i32.const 16
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index 43d854e8cdd30..723beb4166114 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -483,7 +483,7 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shuffle_undef_v8i16 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
-; SIMD128-SAME: 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-SAME: 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
%res = shufflevector <8 x i16> %x, <8 x i16> %y,
@@ -644,7 +644,7 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shuffle_undef_v4i32 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
-; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-SAME: 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
%res = shufflevector <4 x i32> %x, <4 x i32> %y,
@@ -793,7 +793,7 @@ define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shuffle_undef_v2i64 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
-; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) {
%res = shufflevector <2 x i64> %x, <2 x i64> %y,
@@ -942,7 +942,7 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shuffle_undef_v4f32 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
-; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-SAME: 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
%res = shufflevector <4 x float> %x, <4 x float> %y,
@@ -1092,7 +1092,7 @@ define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shuffle_undef_v2f64 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0,
-; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}}
+; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) {
%res = shufflevector <2 x double> %x, <2 x double> %y,
More information about the llvm-commits
mailing list