[llvm] 3378657 - [WebAssembly] Codegen for extmul SIMD instructions
Thomas Lively via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 27 08:41:37 PDT 2021
Author: Thomas Lively
Date: 2021-07-27T08:41:30-07:00
New Revision: 33786576fd3a7f79a6627a51386b173940bab0e9
URL: https://github.com/llvm/llvm-project/commit/33786576fd3a7f79a6627a51386b173940bab0e9
DIFF: https://github.com/llvm/llvm-project/commit/33786576fd3a7f79a6627a51386b173940bab0e9.diff
LOG: [WebAssembly] Codegen for extmul SIMD instructions
Replace the clang builtins and LLVM intrinsics for the SIMD extmul instructions
with normal codegen patterns.
Differential Revision: https://reviews.llvm.org/D106724
Added:
Modified:
clang/include/clang/Basic/BuiltinsWebAssembly.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/wasm_simd128.h
clang/test/CodeGen/builtins-wasm.c
clang/test/Headers/wasm.c
llvm/include/llvm/IR/IntrinsicsWebAssembly.td
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-arith.ll
llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index aff14b7dbf4d8..04ec45aa3b747 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -103,21 +103,6 @@ TARGET_BUILTIN(__builtin_wasm_popcnt_i8x16, "V16ScV16Sc", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_q15mulr_sat_s_i16x8, "V8sV8sV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128")
-
-TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128")
-
-TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128")
-
TARGET_BUILTIN(__builtin_wasm_extadd_pairwise_i8x16_s_i16x8, "V8sV16Sc", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extadd_pairwise_i8x16_u_i16x8, "V8UsV16Uc", "nc", "simd128")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f6a9a10c3f7fd..d9b2a5fe16bec 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17864,49 +17864,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
return Builder.CreateCall(Callee, {LHS, RHS});
}
- case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
- case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
- case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
- case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: {
- Value *LHS = EmitScalarExpr(E->getArg(0));
- Value *RHS = EmitScalarExpr(E->getArg(1));
- unsigned IntNo;
- switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
- IntNo = Intrinsic::wasm_extmul_low_signed;
- break;
- case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
- IntNo = Intrinsic::wasm_extmul_low_unsigned;
- break;
- case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
- IntNo = Intrinsic::wasm_extmul_high_signed;
- break;
- case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
- case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
- case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2:
- IntNo = Intrinsic::wasm_extmul_high_unsigned;
- break;
- default:
- llvm_unreachable("unexptected builtin ID");
- }
-
- Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {LHS, RHS});
- }
case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 309e39e801861..712fa03780986 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -1455,74 +1455,74 @@ wasm_u32x4_extadd_pairwise_u16x8(v128_t __a) {
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_extmul_low_i8x16(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_low_i8x16_s_i16x8((__i8x16)__a,
- (__i8x16)__b);
+ return (v128_t)((__i16x8)wasm_i16x8_extend_low_i8x16(__a) *
+ (__i16x8)wasm_i16x8_extend_low_i8x16(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_extmul_high_i8x16(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_high_i8x16_s_i16x8((__i8x16)__a,
- (__i8x16)__b);
+ return (v128_t)((__i16x8)wasm_i16x8_extend_high_i8x16(__a) *
+ (__i16x8)wasm_i16x8_extend_high_i8x16(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_u16x8_extmul_low_u8x16(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_low_i8x16_u_i16x8((__u8x16)__a,
- (__u8x16)__b);
+ return (v128_t)((__u16x8)wasm_u16x8_extend_low_u8x16(__a) *
+ (__u16x8)wasm_u16x8_extend_low_u8x16(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_u16x8_extmul_high_u8x16(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_high_i8x16_u_i16x8((__u8x16)__a,
- (__u8x16)__b);
+ return (v128_t)((__u16x8)wasm_u16x8_extend_high_u8x16(__a) *
+ (__u16x8)wasm_u16x8_extend_high_u8x16(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_extmul_low_i16x8(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_low_i16x8_s_i32x4((__i16x8)__a,
- (__i16x8)__b);
+ return (v128_t)((__i32x4)wasm_i32x4_extend_low_i16x8(__a) *
+ (__i32x4)wasm_i32x4_extend_low_i16x8(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_extmul_high_i16x8(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_high_i16x8_s_i32x4((__i16x8)__a,
- (__i16x8)__b);
+ return (v128_t)((__i32x4)wasm_i32x4_extend_high_i16x8(__a) *
+ (__i32x4)wasm_i32x4_extend_high_i16x8(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_u32x4_extmul_low_u16x8(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_low_i16x8_u_i32x4((__u16x8)__a,
- (__u16x8)__b);
+ return (v128_t)((__u32x4)wasm_u32x4_extend_low_u16x8(__a) *
+ (__u32x4)wasm_u32x4_extend_low_u16x8(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_u32x4_extmul_high_u16x8(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_high_i16x8_u_i32x4((__u16x8)__a,
- (__u16x8)__b);
+ return (v128_t)((__u32x4)wasm_u32x4_extend_high_u16x8(__a) *
+ (__u32x4)wasm_u32x4_extend_high_u16x8(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i64x2_extmul_low_i32x4(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_low_i32x4_s_i64x2((__i32x4)__a,
- (__i32x4)__b);
+ return (v128_t)((__i64x2)wasm_i64x2_extend_low_i32x4(__a) *
+ (__i64x2)wasm_i64x2_extend_low_i32x4(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i64x2_extmul_high_i32x4(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_high_i32x4_s_i64x2((__i32x4)__a,
- (__i32x4)__b);
+ return (v128_t)((__i64x2)wasm_i64x2_extend_high_i32x4(__a) *
+ (__i64x2)wasm_i64x2_extend_high_i32x4(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_u64x2_extmul_low_u32x4(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_low_i32x4_u_i64x2((__u32x4)__a,
- (__u32x4)__b);
+ return (v128_t)((__u64x2)wasm_u64x2_extend_low_u32x4(__a) *
+ (__u64x2)wasm_u64x2_extend_low_u32x4(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_u64x2_extmul_high_u32x4(v128_t __a, v128_t __b) {
- return (v128_t)__builtin_wasm_extmul_high_i32x4_u_i64x2((__u32x4)__a,
- (__u32x4)__b);
+ return (v128_t)((__u64x2)wasm_u64x2_extend_high_u32x4(__a) *
+ (__u64x2)wasm_u64x2_extend_high_u32x4(__b));
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_q15mulr_sat(v128_t __a,
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 2513f98f30d09..ac33ce5716e6d 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -379,90 +379,6 @@ i16x8 q15mulr_sat_s_i16x8(i16x8 x, i16x8 y) {
// WEBASSEMBLY-NEXT: ret
}
-i16x8 extmul_low_i8x16_s_i16x8(i8x16 x, i8x16 y) {
- return __builtin_wasm_extmul_low_i8x16_s_i16x8(x, y);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(
- // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i16x8 extmul_high_i8x16_s_i16x8(i8x16 x, i8x16 y) {
- return __builtin_wasm_extmul_high_i8x16_s_i16x8(x, y);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(
- // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-u16x8 extmul_low_i8x16_u_i16x8(u8x16 x, u8x16 y) {
- return __builtin_wasm_extmul_low_i8x16_u_i16x8(x, y);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(
- // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-u16x8 extmul_high_i8x16_u_i16x8(u8x16 x, u8x16 y) {
- return __builtin_wasm_extmul_high_i8x16_u_i16x8(x, y);
- // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(
- // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i32x4 extmul_low_i16x8_s_i32x4(i16x8 x, i16x8 y) {
- return __builtin_wasm_extmul_low_i16x8_s_i32x4(x, y);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(
- // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i32x4 extmul_high_i16x8_s_i32x4(i16x8 x, i16x8 y) {
- return __builtin_wasm_extmul_high_i16x8_s_i32x4(x, y);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(
- // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-u32x4 extmul_low_i16x8_u_i32x4(u16x8 x, u16x8 y) {
- return __builtin_wasm_extmul_low_i16x8_u_i32x4(x, y);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(
- // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-u32x4 extmul_high_i16x8_u_i32x4(u16x8 x, u16x8 y) {
- return __builtin_wasm_extmul_high_i16x8_u_i32x4(x, y);
- // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(
- // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i64x2 extmul_low_i32x4_s_i64x2(i32x4 x, i32x4 y) {
- return __builtin_wasm_extmul_low_i32x4_s_i64x2(x, y);
- // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(
- // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-i64x2 extmul_high_i32x4_s_i64x2(i32x4 x, i32x4 y) {
- return __builtin_wasm_extmul_high_i32x4_s_i64x2(x, y);
- // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(
- // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-u64x2 extmul_low_i32x4_u_i64x2(u32x4 x, u32x4 y) {
- return __builtin_wasm_extmul_low_i32x4_u_i64x2(x, y);
- // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(
- // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
-u64x2 extmul_high_i32x4_u_i64x2(u32x4 x, u32x4 y) {
- return __builtin_wasm_extmul_high_i32x4_u_i64x2(x, y);
- // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(
- // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y)
- // WEBASSEMBLY-NEXT: ret
-}
-
i16x8 extadd_pairwise_i8x16_s_i16x8(i8x16 v) {
return __builtin_wasm_extadd_pairwise_i8x16_s_i16x8(v);
// WEBASSEMBLY: call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(
diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 4552d14c1ebe5..f51f005974f23 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -2781,10 +2781,14 @@ v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
// CHECK-LABEL: @test_i16x8_extmul_low_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
return wasm_i16x8_extmul_low_i8x16(a, b);
@@ -2793,10 +2797,14 @@ v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i16x8_extmul_high_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
return wasm_i16x8_extmul_high_i8x16(a, b);
@@ -2805,10 +2813,14 @@ v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u16x8_extmul_low_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
return wasm_u16x8_extmul_low_u8x16(a, b);
@@ -2817,10 +2829,14 @@ v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u16x8_extmul_high_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
return wasm_u16x8_extmul_high_u8x16(a, b);
@@ -2829,9 +2845,13 @@ v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i32x4_extmul_low_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
return wasm_i32x4_extmul_low_i16x8(a, b);
@@ -2840,9 +2860,13 @@ v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i32x4_extmul_high_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
return wasm_i32x4_extmul_high_i16x8(a, b);
@@ -2851,9 +2875,13 @@ v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u32x4_extmul_low_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
return wasm_u32x4_extmul_low_u16x8(a, b);
@@ -2862,9 +2890,13 @@ v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u32x4_extmul_high_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
return wasm_u32x4_extmul_high_u16x8(a, b);
@@ -2872,9 +2904,13 @@ v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
return wasm_i64x2_extmul_low_i32x4(a, b);
@@ -2882,9 +2918,13 @@ v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
return wasm_i64x2_extmul_high_i32x4(a, b);
@@ -2892,9 +2932,13 @@ v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
return wasm_u64x2_extmul_low_u32x4(a, b);
@@ -2902,9 +2946,13 @@ v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR6]]
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
+// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
return wasm_u64x2_extmul_high_u32x4(a, b);
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 68885c7851a45..11990554037d6 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -162,23 +162,6 @@ def int_wasm_q15mulr_sat_signed :
[llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrSpeculatable]>;
-def int_wasm_extmul_low_signed :
- Intrinsic<[llvm_anyvector_ty],
- [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_extmul_high_signed :
- Intrinsic<[llvm_anyvector_ty],
- [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_extmul_low_unsigned :
- Intrinsic<[llvm_anyvector_ty],
- [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_extmul_high_unsigned :
- Intrinsic<[llvm_anyvector_ty],
- [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-
def int_wasm_extadd_pairwise_signed :
Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>],
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index f59e186265bd1..6429b46673a61 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1028,7 +1028,14 @@ defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
186>;
// Extending multiplication: extmul_{low,high}_P, extmul_high
-multiclass SIMDExtBinary<Vec vec, Intrinsic node, string name, bits<32> simdop> {
+def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>;
+def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>;
+def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>;
+def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>;
+
+multiclass SIMDExtBinary<Vec vec, SDPatternOperator node, string name,
+ bits<32> simdop> {
defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
(outs), (ins),
[(set (vec.vt V128:$dst), (node
@@ -1037,32 +1044,41 @@ multiclass SIMDExtBinary<Vec vec, Intrinsic node, string name, bits<32> simdop>
vec.prefix#"."#name, simdop>;
}
+class ExtMulPat<SDNode extend> :
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (mul (extend $lhs), (extend $rhs))> {}
+
+def extmul_low_s : ExtMulPat<extend_low_s>;
+def extmul_high_s : ExtMulPat<extend_high_s>;
+def extmul_low_u : ExtMulPat<extend_low_u>;
+def extmul_high_u : ExtMulPat<extend_high_u>;
+
defm EXTMUL_LOW_S :
- SIMDExtBinary<I16x8, int_wasm_extmul_low_signed, "extmul_low_i8x16_s", 0x9c>;
+ SIMDExtBinary<I16x8, extmul_low_s, "extmul_low_i8x16_s", 0x9c>;
defm EXTMUL_HIGH_S :
- SIMDExtBinary<I16x8, int_wasm_extmul_high_signed, "extmul_high_i8x16_s", 0x9d>;
+ SIMDExtBinary<I16x8, extmul_high_s, "extmul_high_i8x16_s", 0x9d>;
defm EXTMUL_LOW_U :
- SIMDExtBinary<I16x8, int_wasm_extmul_low_unsigned, "extmul_low_i8x16_u", 0x9e>;
+ SIMDExtBinary<I16x8, extmul_low_u, "extmul_low_i8x16_u", 0x9e>;
defm EXTMUL_HIGH_U :
- SIMDExtBinary<I16x8, int_wasm_extmul_high_unsigned, "extmul_high_i8x16_u", 0x9f>;
+ SIMDExtBinary<I16x8, extmul_high_u, "extmul_high_i8x16_u", 0x9f>;
defm EXTMUL_LOW_S :
- SIMDExtBinary<I32x4, int_wasm_extmul_low_signed, "extmul_low_i16x8_s", 0xbc>;
+ SIMDExtBinary<I32x4, extmul_low_s, "extmul_low_i16x8_s", 0xbc>;
defm EXTMUL_HIGH_S :
- SIMDExtBinary<I32x4, int_wasm_extmul_high_signed, "extmul_high_i16x8_s", 0xbd>;
+ SIMDExtBinary<I32x4, extmul_high_s, "extmul_high_i16x8_s", 0xbd>;
defm EXTMUL_LOW_U :
- SIMDExtBinary<I32x4, int_wasm_extmul_low_unsigned, "extmul_low_i16x8_u", 0xbe>;
+ SIMDExtBinary<I32x4, extmul_low_u, "extmul_low_i16x8_u", 0xbe>;
defm EXTMUL_HIGH_U :
- SIMDExtBinary<I32x4, int_wasm_extmul_high_unsigned, "extmul_high_i16x8_u", 0xbf>;
+ SIMDExtBinary<I32x4, extmul_high_u, "extmul_high_i16x8_u", 0xbf>;
defm EXTMUL_LOW_S :
- SIMDExtBinary<I64x2, int_wasm_extmul_low_signed, "extmul_low_i32x4_s", 0xdc>;
+ SIMDExtBinary<I64x2, extmul_low_s, "extmul_low_i32x4_s", 0xdc>;
defm EXTMUL_HIGH_S :
- SIMDExtBinary<I64x2, int_wasm_extmul_high_signed, "extmul_high_i32x4_s", 0xdd>;
+ SIMDExtBinary<I64x2, extmul_high_s, "extmul_high_i32x4_s", 0xdd>;
defm EXTMUL_LOW_U :
- SIMDExtBinary<I64x2, int_wasm_extmul_low_unsigned, "extmul_low_i32x4_u", 0xde>;
+ SIMDExtBinary<I64x2, extmul_low_u, "extmul_low_i32x4_u", 0xde>;
defm EXTMUL_HIGH_U :
- SIMDExtBinary<I64x2, int_wasm_extmul_high_unsigned, "extmul_high_i32x4_u", 0xdf>;
+ SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
@@ -1191,12 +1207,6 @@ defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>;
defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>;
// Extending operations
-def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
-def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>;
-def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>;
-def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>;
-def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>;
-
// TODO: refactor this to be uniform for i64x2 if the numbering is not changed.
multiclass SIMDExtend<Vec vec, bits<32> baseInst> {
defm "" : SIMDConvert<vec, vec.split, extend_low_s,
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 47879cfb1dfbc..dc33f36b97f2b 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -659,6 +659,70 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
ret <8 x i16> %a
}
+; CHECK-LABEL: extmul_low_s_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i16x8.extmul_low_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
+ %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %low2 = shufflevector <16 x i8> %v2, <16 x i8> undef,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %extended1 = sext <8 x i8> %low1 to <8 x i16>
+ %extended2 = sext <8 x i8> %low2 to <8 x i16>
+ %a = mul <8 x i16> %extended1, %extended2
+ ret <8 x i16> %a
+}
+
+; CHECK-LABEL: extmul_high_s_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .functype extmul_high_s_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i16x8.extmul_high_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
+ %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %high2 = shufflevector <16 x i8> %v2, <16 x i8> undef,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %extended1 = sext <8 x i8> %high1 to <8 x i16>
+ %extended2 = sext <8 x i8> %high2 to <8 x i16>
+ %a = mul <8 x i16> %extended1, %extended2
+ ret <8 x i16> %a
+}
+
+; CHECK-LABEL: extmul_low_u_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .functype extmul_low_u_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i16x8.extmul_low_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
+ %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %low2 = shufflevector <16 x i8> %v2, <16 x i8> undef,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %extended1 = zext <8 x i8> %low1 to <8 x i16>
+ %extended2 = zext <8 x i8> %low2 to <8 x i16>
+ %a = mul <8 x i16> %extended1, %extended2
+ ret <8 x i16> %a
+}
+
+; CHECK-LABEL: extmul_high_u_v8i16:
+; NO-SIMD128-NOT: i16x8
+; SIMD128-NEXT: .functype extmul_high_u_v8i16 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i16x8.extmul_high_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
+ %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %high2 = shufflevector <16 x i8> %v2, <16 x i8> undef,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %extended1 = zext <8 x i8> %high1 to <8 x i16>
+ %extended2 = zext <8 x i8> %high2 to <8 x i16>
+ %a = mul <8 x i16> %extended1, %extended2
+ ret <8 x i16> %a
+}
+
; ==============================================================================
; 4 x i32
; ==============================================================================
@@ -934,6 +998,70 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
ret <4 x i32> %a
}
+; CHECK-LABEL: extmul_low_s_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i32x4.extmul_low_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
+ %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %low2 = shufflevector <8 x i16> %v2, <8 x i16> undef,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %extended1 = sext <4 x i16> %low1 to <4 x i32>
+ %extended2 = sext <4 x i16> %low2 to <4 x i32>
+ %a = mul <4 x i32> %extended1, %extended2
+ ret <4 x i32> %a
+}
+
+; CHECK-LABEL: extmul_high_s_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .functype extmul_high_s_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i32x4.extmul_high_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
+ %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
+ <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %high2 = shufflevector <8 x i16> %v2, <8 x i16> undef,
+ <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %extended1 = sext <4 x i16> %high1 to <4 x i32>
+ %extended2 = sext <4 x i16> %high2 to <4 x i32>
+ %a = mul <4 x i32> %extended1, %extended2
+ ret <4 x i32> %a
+}
+
+; CHECK-LABEL: extmul_low_u_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .functype extmul_low_u_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i32x4.extmul_low_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
+ %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %low2 = shufflevector <8 x i16> %v2, <8 x i16> undef,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %extended1 = zext <4 x i16> %low1 to <4 x i32>
+ %extended2 = zext <4 x i16> %low2 to <4 x i32>
+ %a = mul <4 x i32> %extended1, %extended2
+ ret <4 x i32> %a
+}
+
+; CHECK-LABEL: extmul_high_u_v4i32:
+; NO-SIMD128-NOT: i32x4
+; SIMD128-NEXT: .functype extmul_high_u_v4i32 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i32x4.extmul_high_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
+ %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
+ <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %high2 = shufflevector <8 x i16> %v2, <8 x i16> undef,
+ <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %extended1 = zext <4 x i16> %high1 to <4 x i32>
+ %extended2 = zext <4 x i16> %high2 to <4 x i32>
+ %a = mul <4 x i32> %extended1, %extended2
+ ret <4 x i32> %a
+}
+
; ==============================================================================
; 2 x i64
; ==============================================================================
@@ -1262,6 +1390,62 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
ret <2 x i64> %a
}
+; CHECK-LABEL: extmul_low_s_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) {
+ %low1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %low2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %extended1 = sext <2 x i32> %low1 to <2 x i64>
+ %extended2 = sext <2 x i32> %low2 to <2 x i64>
+ %a = mul <2 x i64> %extended1, %extended2
+ ret <2 x i64> %a
+}
+
+; CHECK-LABEL: extmul_high_s_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .functype extmul_high_s_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i64x2.extmul_high_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) {
+ %high1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %high2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %extended1 = sext <2 x i32> %high1 to <2 x i64>
+ %extended2 = sext <2 x i32> %high2 to <2 x i64>
+ %a = mul <2 x i64> %extended1, %extended2
+ ret <2 x i64> %a
+}
+
+; CHECK-LABEL: extmul_low_u_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .functype extmul_low_u_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i64x2.extmul_low_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) {
+ %low1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %low2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %extended1 = zext <2 x i32> %low1 to <2 x i64>
+ %extended2 = zext <2 x i32> %low2 to <2 x i64>
+ %a = mul <2 x i64> %extended1, %extended2
+ ret <2 x i64> %a
+}
+
+; CHECK-LABEL: extmul_high_u_v2i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .functype extmul_high_u_v2i64 (v128, v128) -> (v128){{$}}
+; SIMD128-SLOW-NEXT: i64x2.extmul_high_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) {
+ %high1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %high2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %extended1 = zext <2 x i32> %high1 to <2 x i64>
+ %extended2 = zext <2 x i32> %high2 to <2 x i64>
+ %a = mul <2 x i64> %extended1, %extended2
+ ret <2 x i64> %a
+}
+
; ==============================================================================
; 4 x float
; ==============================================================================
diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
index aaa1c4d508618..f7cc9302003db 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -248,54 +248,6 @@ define <8 x i16> @q15mulr_sat_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
ret <8 x i16> %a
}
-; CHECK-LABEL: extmul_low_s_v8i16:
-; CHECK-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8>, <16 x i8>)
-define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %x, <16 x i8> %y) {
- %a = call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(
- <16 x i8> %x, <16 x i8> %y
- )
- ret <8 x i16> %a
-}
-
-; CHECK-LABEL: extmul_high_s_v8i16:
-; CHECK-NEXT: .functype extmul_high_s_v8i16 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8>, <16 x i8>)
-define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %x, <16 x i8> %y) {
- %a = call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(
- <16 x i8> %x, <16 x i8> %y
- )
- ret <8 x i16> %a
-}
-
-; CHECK-LABEL: extmul_low_u_v8i16:
-; CHECK-NEXT: .functype extmul_low_u_v8i16 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i16x8.extmul_low_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8>, <16 x i8>)
-define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %x, <16 x i8> %y) {
- %a = call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(
- <16 x i8> %x, <16 x i8> %y
- )
- ret <8 x i16> %a
-}
-
-; CHECK-LABEL: extmul_high_u_v8i16:
-; CHECK-NEXT: .functype extmul_high_u_v8i16 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i16x8.extmul_high_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8>, <16 x i8>)
-define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %x, <16 x i8> %y) {
- %a = call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(
- <16 x i8> %x, <16 x i8> %y
- )
- ret <8 x i16> %a
-}
-
; CHECK-LABEL: extadd_pairwise_s_v8i16:
; CHECK-NEXT: .functype extadd_pairwise_s_v8i16 (v128) -> (v128){{$}}
; CHECK-NEXT: i16x8.extadd_pairwise_i8x16_s $push[[R:[0-9]+]]=, $0{{$}}
@@ -395,55 +347,6 @@ define <4 x i32> @dot(<8 x i16> %x, <8 x i16> %y) {
ret <4 x i32> %a
}
-
-; CHECK-LABEL: extmul_low_s_v4i32:
-; CHECK-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.extmul_low_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16>, <8 x i16>)
-define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %x, <8 x i16> %y) {
- %a = call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(
- <8 x i16> %x, <8 x i16> %y
- )
- ret <4 x i32> %a
-}
-
-; CHECK-LABEL: extmul_high_s_v4i32:
-; CHECK-NEXT: .functype extmul_high_s_v4i32 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.extmul_high_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16>, <8 x i16>)
-define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %x, <8 x i16> %y) {
- %a = call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(
- <8 x i16> %x, <8 x i16> %y
- )
- ret <4 x i32> %a
-}
-
-; CHECK-LABEL: extmul_low_u_v4i32:
-; CHECK-NEXT: .functype extmul_low_u_v4i32 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.extmul_low_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16>, <8 x i16>)
-define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %x, <8 x i16> %y) {
- %a = call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(
- <8 x i16> %x, <8 x i16> %y
- )
- ret <4 x i32> %a
-}
-
-; CHECK-LABEL: extmul_high_u_v4i32:
-; CHECK-NEXT: .functype extmul_high_u_v4i32 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.extmul_high_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16>, <8 x i16>)
-define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %x, <8 x i16> %y) {
- %a = call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(
- <8 x i16> %x, <8 x i16> %y
- )
- ret <4 x i32> %a
-}
-
; CHECK-LABEL: extadd_pairwise_s_v4i32:
; CHECK-NEXT: .functype extadd_pairwise_s_v4i32 (v128) -> (v128){{$}}
; CHECK-NEXT: i32x4.extadd_pairwise_i16x8_s $push[[R:[0-9]+]]=, $0{{$}}
@@ -580,54 +483,6 @@ define <4 x i32> @trunc_sat_zero_u_v4i32_2(<2 x double> %x) {
; ==============================================================================
; 2 x i64
; ==============================================================================
-; CHECK-LABEL: extmul_low_s_v2i64:
-; CHECK-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32>, <4 x i32>)
-define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %x, <4 x i32> %y) {
- %a = call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(
- <4 x i32> %x, <4 x i32> %y
- )
- ret <2 x i64> %a
-}
-
-; CHECK-LABEL: extmul_high_s_v2i64:
-; CHECK-NEXT: .functype extmul_high_s_v2i64 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extmul_high_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32>, <4 x i32>)
-define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %x, <4 x i32> %y) {
- %a = call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(
- <4 x i32> %x, <4 x i32> %y
- )
- ret <2 x i64> %a
-}
-
-; CHECK-LABEL: extmul_low_u_v2i64:
-; CHECK-NEXT: .functype extmul_low_u_v2i64 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extmul_low_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32>, <4 x i32>)
-define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %x, <4 x i32> %y) {
- %a = call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(
- <4 x i32> %x, <4 x i32> %y
- )
- ret <2 x i64> %a
-}
-
-; CHECK-LABEL: extmul_high_u_v2i64:
-; CHECK-NEXT: .functype extmul_high_u_v2i64 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extmul_high_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32>, <4 x i32>)
-define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %x, <4 x i32> %y) {
- %a = call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(
- <4 x i32> %x, <4 x i32> %y
- )
- ret <2 x i64> %a
-}
-
; CHECK-LABEL: any_v2i64:
; CHECK-NEXT: .functype any_v2i64 (v128) -> (i32){{$}}
; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}}
More information about the llvm-commits
mailing list