[llvm] f841ad3 - [WebAssembly] Replace LOAD_SPLAT with SPLAT_VECTOR
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 4 07:09:26 PST 2023
Author: Luke Lau
Date: 2023-01-04T15:07:47Z
New Revision: f841ad30d77eeb4c51663e68efefdb734c7a3d07
URL: https://github.com/llvm/llvm-project/commit/f841ad30d77eeb4c51663e68efefdb734c7a3d07
DIFF: https://github.com/llvm/llvm-project/commit/f841ad30d77eeb4c51663e68efefdb734c7a3d07.diff
LOG: [WebAssembly] Replace LOAD_SPLAT with SPLAT_VECTOR
Splats were selected by matching on uses of `build_vector` with
identical elements, but a while back a target independent node for
vector splatting was added.
This removes the WebAssembly specific LOAD_SPLAT intrinsic, and instead
makes SPLAT_VECTOR legal and adds patterns for splat loads.
Differential Revision: https://reviews.llvm.org/D139871
Added:
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyISD.def
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
llvm/test/CodeGen/WebAssembly/simd-load-splat.ll
llvm/test/CodeGen/WebAssembly/simd-pr51605.ll
llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index a3a33f4a5b3a3..b8954f4693f0a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -46,7 +46,6 @@ HANDLE_NODETYPE(MEMORY_COPY)
HANDLE_NODETYPE(MEMORY_FILL)
// Memory intrinsics
-HANDLE_MEM_NODETYPE(LOAD_SPLAT)
HANDLE_MEM_NODETYPE(GLOBAL_GET)
HANDLE_MEM_NODETYPE(GLOBAL_SET)
HANDLE_MEM_NODETYPE(TABLE_GET)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index eee707bcd66ed..f32efde459024 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -194,6 +194,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
MVT::v2f64})
setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
+ // Support splatting
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+ MVT::v2f64})
+ setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
+
// Custom lowering since wasm shifts must have a scalar shift amount
for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
@@ -2161,18 +2166,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return IsConstant(Lane);
};
} else {
- // Use a splat, but possibly a load_splat
- LoadSDNode *SplattedLoad;
- if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
- SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
- Result = DAG.getMemIntrinsicNode(
- WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
- {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
- SplattedLoad->getOffset()},
- SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
- } else {
- Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
- }
+ // Use a splat (which might be selected as a load splat)
+ Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
return Lane == SplatValue;
};
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index ffd1f91a88288..ad2ec40b8b31b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -54,15 +54,6 @@ def ImmI#SIZE : ImmLeaf<i32,
foreach SIZE = [2, 4, 8, 16, 32] in
def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
-// Create vector with identical lanes: splat
-def splat2 : PatFrag<(ops node:$x), (build_vector $x, $x)>;
-def splat4 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x)>;
-def splat8 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x,
- $x, $x, $x, $x)>;
-def splat16 : PatFrag<(ops node:$x),
- (build_vector $x, $x, $x, $x, $x, $x, $x, $x,
- $x, $x, $x, $x, $x, $x, $x, $x)>;
-
class Vec {
ValueType vt;
ValueType int_vt;
@@ -70,6 +61,7 @@ class Vec {
WebAssemblyRegClass lane_rc;
int lane_bits;
ImmLeaf lane_idx;
+ SDPatternOperator lane_load;
PatFrag splat;
string prefix;
Vec split;
@@ -82,7 +74,8 @@ def I8x16 : Vec {
let lane_rc = I32;
let lane_bits = 8;
let lane_idx = LaneIdx16;
- let splat = splat16;
+ let lane_load = extloadi8;
+ let splat = PatFrag<(ops node:$x), (v16i8 (splat_vector (i8 $x)))>;
let prefix = "i8x16";
}
@@ -93,7 +86,8 @@ def I16x8 : Vec {
let lane_rc = I32;
let lane_bits = 16;
let lane_idx = LaneIdx8;
- let splat = splat8;
+ let lane_load = extloadi16;
+ let splat = PatFrag<(ops node:$x), (v8i16 (splat_vector (i16 $x)))>;
let prefix = "i16x8";
let split = I8x16;
}
@@ -105,7 +99,8 @@ def I32x4 : Vec {
let lane_rc = I32;
let lane_bits = 32;
let lane_idx = LaneIdx4;
- let splat = splat4;
+ let lane_load = load;
+ let splat = PatFrag<(ops node:$x), (v4i32 (splat_vector (i32 $x)))>;
let prefix = "i32x4";
let split = I16x8;
}
@@ -117,7 +112,8 @@ def I64x2 : Vec {
let lane_rc = I64;
let lane_bits = 64;
let lane_idx = LaneIdx2;
- let splat = splat2;
+ let lane_load = load;
+ let splat = PatFrag<(ops node:$x), (v2i64 (splat_vector (i64 $x)))>;
let prefix = "i64x2";
let split = I32x4;
}
@@ -129,7 +125,8 @@ def F32x4 : Vec {
let lane_rc = F32;
let lane_bits = 32;
let lane_idx = LaneIdx4;
- let splat = splat4;
+ let lane_load = load;
+ let splat = PatFrag<(ops node:$x), (v4f32 (splat_vector (f32 $x)))>;
let prefix = "f32x4";
}
@@ -140,7 +137,8 @@ def F64x2 : Vec {
let lane_rc = F64;
let lane_bits = 64;
let lane_idx = LaneIdx2;
- let splat = splat2;
+ let lane_load = load;
+ let splat = PatFrag<(ops node:$x), (v2f64 (splat_vector (f64 $x)))>;
let prefix = "f64x2";
}
@@ -195,14 +193,11 @@ defm "" : SIMDLoadSplat<16, 8>;
defm "" : SIMDLoadSplat<32, 9>;
defm "" : SIMDLoadSplat<64, 10>;
-def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
-def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>;
-
foreach vec = AllVecs in {
-defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
-defm : LoadPat<vec.vt, load_splat, inst>;
+ defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
+ defm : LoadPat<vec.vt,
+ PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>,
+ inst>;
}
// Load and extend
@@ -488,6 +483,17 @@ defm "" : ConstVec<F64x2,
(build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
"$i0, $i1">;
+// Match splat(x) -> const.v128(x, ..., x)
+foreach vec = AllVecs in {
+ defvar numEls = !div(vec.vt.Size, vec.lane_bits);
+ defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64));
+ defvar immKind = !if(isFloat, fpimm, imm);
+ def : Pat<(vec.splat (vec.lane_vt immKind:$x)),
+ !dag(!cast<NI>("CONST_V128_"#vec),
+ !listsplat((vec.lane_vt immKind:$x), numEls),
+ ?)>;
+}
+
// Shuffle lanes: shuffle
defm SHUFFLE :
SIMD_I<(outs V128:$dst),
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index e7bd9b53541f1..007802dd0c035 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -297,9 +297,9 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-NEXT: f64x2.extract_lane 1
; CHECK-NEXT: i32.trunc_sat_f64_s
; CHECK-NEXT: i32x4.replace_lane 1
-; CHECK-NEXT: v128.const 32767, 32767, 0, 0
+; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: v128.const -32768, -32768, 0, 0
+; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
@@ -326,7 +326,7 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
; CHECK-NEXT: f64x2.extract_lane 1
; CHECK-NEXT: i32.trunc_sat_f64_u
; CHECK-NEXT: i32x4.replace_lane 1
-; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
@@ -351,7 +351,7 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-NEXT: f64x2.extract_lane 1
; CHECK-NEXT: i32.trunc_sat_f64_s
; CHECK-NEXT: i32x4.replace_lane 1
-; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: i32x4.min_s
; CHECK-NEXT: v128.const 0, 0, 0, 0
; CHECK-NEXT: i32x4.max_s
@@ -1790,9 +1790,9 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-NEXT: f64x2.extract_lane 1
; CHECK-NEXT: i32.trunc_sat_f64_s
; CHECK-NEXT: i32x4.replace_lane 1
-; CHECK-NEXT: v128.const 32767, 32767, 0, 0
+; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: v128.const -32768, -32768, 0, 0
+; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
; CHECK-NEXT: i32x4.max_s
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
@@ -1817,7 +1817,7 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
; CHECK-NEXT: f64x2.extract_lane 1
; CHECK-NEXT: i32.trunc_sat_f64_u
; CHECK-NEXT: i32x4.replace_lane 1
-; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
@@ -1841,7 +1841,7 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-NEXT: f64x2.extract_lane 1
; CHECK-NEXT: i32.trunc_sat_f64_s
; CHECK-NEXT: i32x4.replace_lane 1
-; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: i32x4.min_s
; CHECK-NEXT: v128.const 0, 0, 0, 0
; CHECK-NEXT: i32x4.max_s
diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
index 1cc05fcf80f15..a51b358de2e89 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
@@ -118,107 +118,92 @@ define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
; CHECK-LABEL: swizzle_all_i8x16:
; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: global.get $push80=, __stack_pointer
-; CHECK-NEXT: i32.const $push81=, 16
-; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81
-; CHECK-NEXT: local.tee $push97=, $2=, $pop98
-; CHECK-NEXT: v128.store 0($pop97), $0
-; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0
+; CHECK-NEXT: global.get $push65=, __stack_pointer
+; CHECK-NEXT: i32.const $push66=, 16
+; CHECK-NEXT: i32.sub $push83=, $pop65, $pop66
+; CHECK-NEXT: local.tee $push82=, $2=, $pop83
+; CHECK-NEXT: v128.store 0($pop82), $0
+; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 15
; CHECK-NEXT: i32.const $push1=, 15
; CHECK-NEXT: i32.and $push62=, $pop61, $pop1
; CHECK-NEXT: i32.or $push63=, $2, $pop62
-; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63)
-; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1
-; CHECK-NEXT: i32.const $push96=, 15
-; CHECK-NEXT: i32.and $push58=, $pop57, $pop96
+; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 14
+; CHECK-NEXT: i32.const $push81=, 15
+; CHECK-NEXT: i32.and $push58=, $pop57, $pop81
; CHECK-NEXT: i32.or $push59=, $2, $pop58
-; CHECK-NEXT: i32.load8_u $push60=, 0($pop59)
-; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60
-; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2
-; CHECK-NEXT: i32.const $push95=, 15
-; CHECK-NEXT: i32.and $push54=, $pop53, $pop95
+; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 13
+; CHECK-NEXT: i32.const $push80=, 15
+; CHECK-NEXT: i32.and $push54=, $pop53, $pop80
; CHECK-NEXT: i32.or $push55=, $2, $pop54
-; CHECK-NEXT: i32.load8_u $push56=, 0($pop55)
-; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56
-; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3
-; CHECK-NEXT: i32.const $push94=, 15
-; CHECK-NEXT: i32.and $push50=, $pop49, $pop94
+; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 12
+; CHECK-NEXT: i32.const $push79=, 15
+; CHECK-NEXT: i32.and $push50=, $pop49, $pop79
; CHECK-NEXT: i32.or $push51=, $2, $pop50
-; CHECK-NEXT: i32.load8_u $push52=, 0($pop51)
-; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52
-; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4
-; CHECK-NEXT: i32.const $push93=, 15
-; CHECK-NEXT: i32.and $push46=, $pop45, $pop93
+; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 11
+; CHECK-NEXT: i32.const $push78=, 15
+; CHECK-NEXT: i32.and $push46=, $pop45, $pop78
; CHECK-NEXT: i32.or $push47=, $2, $pop46
-; CHECK-NEXT: i32.load8_u $push48=, 0($pop47)
-; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48
-; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5
-; CHECK-NEXT: i32.const $push92=, 15
-; CHECK-NEXT: i32.and $push42=, $pop41, $pop92
+; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 10
+; CHECK-NEXT: i32.const $push77=, 15
+; CHECK-NEXT: i32.and $push42=, $pop41, $pop77
; CHECK-NEXT: i32.or $push43=, $2, $pop42
-; CHECK-NEXT: i32.load8_u $push44=, 0($pop43)
-; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44
-; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6
-; CHECK-NEXT: i32.const $push91=, 15
-; CHECK-NEXT: i32.and $push38=, $pop37, $pop91
+; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 9
+; CHECK-NEXT: i32.const $push76=, 15
+; CHECK-NEXT: i32.and $push38=, $pop37, $pop76
; CHECK-NEXT: i32.or $push39=, $2, $pop38
-; CHECK-NEXT: i32.load8_u $push40=, 0($pop39)
-; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40
-; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7
-; CHECK-NEXT: i32.const $push90=, 15
-; CHECK-NEXT: i32.and $push34=, $pop33, $pop90
+; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 8
+; CHECK-NEXT: i32.const $push75=, 15
+; CHECK-NEXT: i32.and $push34=, $pop33, $pop75
; CHECK-NEXT: i32.or $push35=, $2, $pop34
-; CHECK-NEXT: i32.load8_u $push36=, 0($pop35)
-; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36
-; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8
-; CHECK-NEXT: i32.const $push89=, 15
-; CHECK-NEXT: i32.and $push30=, $pop29, $pop89
+; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 7
+; CHECK-NEXT: i32.const $push74=, 15
+; CHECK-NEXT: i32.and $push30=, $pop29, $pop74
; CHECK-NEXT: i32.or $push31=, $2, $pop30
-; CHECK-NEXT: i32.load8_u $push32=, 0($pop31)
-; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32
-; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9
-; CHECK-NEXT: i32.const $push88=, 15
-; CHECK-NEXT: i32.and $push26=, $pop25, $pop88
+; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 6
+; CHECK-NEXT: i32.const $push73=, 15
+; CHECK-NEXT: i32.and $push26=, $pop25, $pop73
; CHECK-NEXT: i32.or $push27=, $2, $pop26
-; CHECK-NEXT: i32.load8_u $push28=, 0($pop27)
-; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28
-; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10
-; CHECK-NEXT: i32.const $push87=, 15
-; CHECK-NEXT: i32.and $push22=, $pop21, $pop87
+; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 5
+; CHECK-NEXT: i32.const $push72=, 15
+; CHECK-NEXT: i32.and $push22=, $pop21, $pop72
; CHECK-NEXT: i32.or $push23=, $2, $pop22
-; CHECK-NEXT: i32.load8_u $push24=, 0($pop23)
-; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24
-; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11
-; CHECK-NEXT: i32.const $push86=, 15
-; CHECK-NEXT: i32.and $push18=, $pop17, $pop86
+; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 4
+; CHECK-NEXT: i32.const $push71=, 15
+; CHECK-NEXT: i32.and $push18=, $pop17, $pop71
; CHECK-NEXT: i32.or $push19=, $2, $pop18
-; CHECK-NEXT: i32.load8_u $push20=, 0($pop19)
-; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20
-; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12
-; CHECK-NEXT: i32.const $push85=, 15
-; CHECK-NEXT: i32.and $push14=, $pop13, $pop85
+; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 3
+; CHECK-NEXT: i32.const $push70=, 15
+; CHECK-NEXT: i32.and $push14=, $pop13, $pop70
; CHECK-NEXT: i32.or $push15=, $2, $pop14
-; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
-; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16
-; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13
-; CHECK-NEXT: i32.const $push84=, 15
-; CHECK-NEXT: i32.and $push10=, $pop9, $pop84
+; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 2
+; CHECK-NEXT: i32.const $push69=, 15
+; CHECK-NEXT: i32.and $push10=, $pop9, $pop69
; CHECK-NEXT: i32.or $push11=, $2, $pop10
-; CHECK-NEXT: i32.load8_u $push12=, 0($pop11)
-; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12
-; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14
-; CHECK-NEXT: i32.const $push83=, 15
-; CHECK-NEXT: i32.and $push6=, $pop5, $pop83
-; CHECK-NEXT: i32.or $push7=, $2, $pop6
-; CHECK-NEXT: i32.load8_u $push8=, 0($pop7)
-; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8
-; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15
-; CHECK-NEXT: i32.const $push82=, 15
-; CHECK-NEXT: i32.and $push2=, $pop0, $pop82
+; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 1
+; CHECK-NEXT: i32.const $push68=, 15
+; CHECK-NEXT: i32.and $push2=, $pop0, $pop68
; CHECK-NEXT: i32.or $push3=, $2, $pop2
-; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
-; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4
-; CHECK-NEXT: return $pop79
+; CHECK-NEXT: i8x16.extract_lane_u $push4=, $1, 0
+; CHECK-NEXT: i32.const $push67=, 15
+; CHECK-NEXT: i32.and $push5=, $pop4, $pop67
+; CHECK-NEXT: i32.or $push6=, $2, $pop5
+; CHECK-NEXT: v128.load8_splat $push7=, 0($pop6)
+; CHECK-NEXT: v128.load8_lane $push8=, 0($pop3), $pop7, 1
+; CHECK-NEXT: v128.load8_lane $push12=, 0($pop11), $pop8, 2
+; CHECK-NEXT: v128.load8_lane $push16=, 0($pop15), $pop12, 3
+; CHECK-NEXT: v128.load8_lane $push20=, 0($pop19), $pop16, 4
+; CHECK-NEXT: v128.load8_lane $push24=, 0($pop23), $pop20, 5
+; CHECK-NEXT: v128.load8_lane $push28=, 0($pop27), $pop24, 6
+; CHECK-NEXT: v128.load8_lane $push32=, 0($pop31), $pop28, 7
+; CHECK-NEXT: v128.load8_lane $push36=, 0($pop35), $pop32, 8
+; CHECK-NEXT: v128.load8_lane $push40=, 0($pop39), $pop36, 9
+; CHECK-NEXT: v128.load8_lane $push44=, 0($pop43), $pop40, 10
+; CHECK-NEXT: v128.load8_lane $push48=, 0($pop47), $pop44, 11
+; CHECK-NEXT: v128.load8_lane $push52=, 0($pop51), $pop48, 12
+; CHECK-NEXT: v128.load8_lane $push56=, 0($pop55), $pop52, 13
+; CHECK-NEXT: v128.load8_lane $push60=, 0($pop59), $pop56, 14
+; CHECK-NEXT: v128.load8_lane $push64=, 0($pop63), $pop60, 15
+; CHECK-NEXT: return $pop64
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
@@ -423,7 +408,7 @@ define <4 x float> @undef_const_insert_f32x4() {
; CHECK-LABEL: undef_const_insert_f32x4:
; CHECK: .functype undef_const_insert_f32x4 () -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: v128.const $push0=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
+; CHECK-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5
; CHECK-NEXT: return $pop0
%v = insertelement <4 x float> undef, float 42., i32 1
ret <4 x float> %v
diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll
index 1a2aaa1f97ecc..c7ef795cf3c0e 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll
@@ -7,10 +7,11 @@ target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: load_splat:
; CHECK-NEXT: .functype load_splat (i32, i32) -> (i32)
-; CHECK-NEXT: i32.load8_u $[[E:[0-9]+]]=, 0($0){{$}}
-; CHECK-NEXT: v128.load8_splat $push[[V:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: i32.load8_u $push[[E:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: local.tee $push[[T:[0-9]+]]=, $[[R:[0-9]+]]=, $pop[[E]]{{$}}
+; CHECK-NEXT: i8x16.splat $push[[V:[0-9]+]]=, $pop[[T]]{{$}}
; CHECK-NEXT: v128.store 0($1), $pop[[V]]{{$}}
-; CHECK-NEXT: return $[[E]]{{$}}
+; CHECK-NEXT: return $[[R]]{{$}}
define i8 @load_splat(ptr %p, ptr %out) {
%e = load i8, ptr %p
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
diff --git a/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll b/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll
index ba89f70824bb4..2c2002d6fa83b 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll
@@ -13,11 +13,9 @@ define <4 x i8> @test_i8(<4 x i8> %b) {
; CHECK-LABEL: test_i8:
; CHECK: .functype test_i8 (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.const -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT: v128.xor
-; CHECK-NEXT: v128.const 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT: v128.and
+; CHECK-NEXT: v128.andnot
; CHECK-NEXT: # fallthrough-return
%c = and <4 x i8> %b, <i8 1, i8 1, i8 1, i8 1>
%d = xor <4 x i8> %c, <i8 1, i8 1, i8 1, i8 1>
@@ -28,11 +26,9 @@ define <4 x i16> @test_i16(<4 x i16> %b) {
; CHECK-LABEL: test_i16:
; CHECK: .functype test_i16 (v128) -> (v128)
; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: v128.const -1, -1, -1, -1, 0, 0, 0, 0
-; CHECK-NEXT: v128.xor
-; CHECK-NEXT: v128.const 1, 1, 1, 1, 0, 0, 0, 0
-; CHECK-NEXT: v128.and
+; CHECK-NEXT: v128.andnot
; CHECK-NEXT: # fallthrough-return
%c = and <4 x i16> %b, <i16 1, i16 1, i16 1, i16 1>
%d = xor <4 x i16> %c, <i16 1, i16 1, i16 1, i16 1>
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
index 88494c0c6aff2..2b08f1c23b59a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
@@ -47,11 +47,10 @@ define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) {
; CHECK-NEXT: i8x16.splat $push1=, $1
; CHECK-NEXT: i8x16.splat $push0=, $2
; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0
-; CHECK-NEXT: i8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-NEXT: i8x16.abs $push4=, $pop3
-; CHECK-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
-; CHECK-NEXT: i8x16.shl $push6=, $0, $pop5
-; CHECK-NEXT: return $pop6
+; CHECK-NEXT: i8x16.abs $push3=, $pop2
+; CHECK-NEXT: i8x16.extract_lane_u $push4=, $pop3, 0
+; CHECK-NEXT: i8x16.shl $push5=, $0, $pop4
+; CHECK-NEXT: return $pop5
define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) {
%t1 = insertelement <16 x i8> undef, i8 %a, i32 0
%va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
More information about the llvm-commits
mailing list