[clang] e522010 - [WebAssembly] Custom combines for f64x2.promote_low_f32x4
Thomas Lively via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 9 18:59:39 PDT 2021
Author: Thomas Lively
Date: 2021-07-09T18:59:29-07:00
New Revision: e5220104d0708e02f2faaacc091189fb3e47e30c
URL: https://github.com/llvm/llvm-project/commit/e5220104d0708e02f2faaacc091189fb3e47e30c
DIFF: https://github.com/llvm/llvm-project/commit/e5220104d0708e02f2faaacc091189fb3e47e30c.diff
LOG: [WebAssembly] Custom combines for f64x2.promote_low_f32x4
Replace the clang builtin function and LLVM intrinsic previously used to select
the f64x2.promote_low_f32x4 instruction with custom combines from standard
SelectionDAG nodes. Implement the new combines to share code with the similar
combines for f64x2.convert_low_i32x4_{s,u}. Resolves PR50232.
Differential Revision: https://reviews.llvm.org/D105675
Added:
Modified:
clang/include/clang/Basic/BuiltinsWebAssembly.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/wasm_simd128.h
clang/test/CodeGen/builtins-wasm.c
clang/test/Headers/wasm.c
llvm/include/llvm/IR/IntrinsicsWebAssembly.td
llvm/lib/Target/WebAssembly/WebAssemblyISD.def
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
llvm/test/CodeGen/WebAssembly/simd-conversions.ll
llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index c11490771973..cdaaa5d81f6d 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -192,7 +192,6 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4iV4i", "nc", "simd128
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_promote_low_f32x4_f64x2, "V2dV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4iiC*", "n", "simd128")
TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLiC*", "n", "simd128")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2c24b71d030a..41ea2bf5f43a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17728,11 +17728,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_demote_zero);
return Builder.CreateCall(Callee, Vec);
}
- case WebAssembly::BI__builtin_wasm_promote_low_f32x4_f64x2: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_promote_low);
- return Builder.CreateCall(Callee, Vec);
- }
case WebAssembly::BI__builtin_wasm_load32_zero: {
Value *Ptr = EmitScalarExpr(E->getArg(0));
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero);
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index fe087d2442a1..4869f7de6c7f 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -43,6 +43,7 @@ typedef unsigned short __u16x4
typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned int __u32x2
__attribute__((__vector_size__(8), __aligned__(8)));
+typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -1155,7 +1156,8 @@ wasm_f32x4_demote_f64x2_zero(v128_t __a) {
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_f64x2_promote_low_f32x4(v128_t __a) {
- return (v128_t)__builtin_wasm_promote_low_f32x4_f64x2((__f32x4)__a);
+ return (v128_t) __builtin_convertvector(
+ (__f32x2){((__f32x4)__a)[0], ((__f32x4)__a)[1]}, __f64x2);
}
#define wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 151bddc4ee03..61130922241c 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -898,12 +898,6 @@ f32x4 wasm_demote_zero_f64x2_f32x4(f64x2 x) {
// WEBASSEMBLY: ret
}
-f64x2 wasm_promote_low_f32x4_f64x2(f32x4 x) {
- return __builtin_wasm_promote_low_f32x4_f64x2(x);
- // WEBASSEMBLY: call <2 x double> @llvm.wasm.promote.low(<4 x float> %x)
- // WEBASSEMBLY: ret
-}
-
i32x4 load32_zero(const int *p) {
return __builtin_wasm_load32_zero(p);
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 86c7f8a2a8ea..1cf87b4bb786 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1281,7 +1281,7 @@ v128_t test_v128_andnot(v128_t a, v128_t b) {
// CHECK-LABEL: @test_v128_any_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR11:[0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10:[0-9]+]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
@@ -1291,7 +1291,7 @@ bool test_v128_any_true(v128_t a) {
// CHECK-LABEL: @test_v128_bitselect(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
@@ -1301,7 +1301,7 @@ v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
// CHECK-LABEL: @test_i8x16_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -1323,7 +1323,7 @@ v128_t test_i8x16_neg(v128_t a) {
// CHECK-LABEL: @test_i8x16_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
@@ -1334,7 +1334,7 @@ bool test_i8x16_all_true(v128_t a) {
// CHECK-LABEL: @test_i8x16_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: ret i32 [[TMP1]]
//
int32_t test_i8x16_bitmask(v128_t a) {
@@ -1344,7 +1344,7 @@ int32_t test_i8x16_bitmask(v128_t a) {
// CHECK-LABEL: @test_i8x16_popcnt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -1410,7 +1410,7 @@ v128_t test_i8x16_add(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1422,7 +1422,7 @@ v128_t test_i8x16_add_sat(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1446,7 +1446,7 @@ v128_t test_i8x16_sub(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1458,7 +1458,7 @@ v128_t test_i8x16_sub_sat(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1522,7 +1522,7 @@ v128_t test_u8x16_max(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1533,7 +1533,7 @@ v128_t test_u8x16_avgr(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i16x8_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -1555,7 +1555,7 @@ v128_t test_i16x8_neg(v128_t a) {
// CHECK-LABEL: @test_i16x8_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
@@ -1566,7 +1566,7 @@ bool test_i16x8_all_true(v128_t a) {
// CHECK-LABEL: @test_i16x8_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: ret i32 [[TMP1]]
//
int32_t test_i16x8_bitmask(v128_t a) {
@@ -1631,7 +1631,7 @@ v128_t test_i16x8_add(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1643,7 +1643,7 @@ v128_t test_i16x8_add_sat(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1667,7 +1667,7 @@ v128_t test_i16x8_sub(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1679,7 +1679,7 @@ v128_t test_i16x8_sub_sat(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1755,7 +1755,7 @@ v128_t test_u16x8_max(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -1765,7 +1765,7 @@ v128_t test_u16x8_avgr(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i32x4_abs(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t test_i32x4_abs(v128_t a) {
@@ -1783,7 +1783,7 @@ v128_t test_i32x4_neg(v128_t a) {
// CHECK-LABEL: @test_i32x4_all_true(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
@@ -1793,7 +1793,7 @@ bool test_i32x4_all_true(v128_t a) {
// CHECK-LABEL: @test_i32x4_bitmask(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]]
// CHECK-NEXT: ret i32 [[TMP0]]
//
int32_t test_i32x4_bitmask(v128_t a) {
@@ -1904,7 +1904,7 @@ v128_t test_u32x4_max(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
@@ -1914,7 +1914,7 @@ v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i64x2_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -1936,7 +1936,7 @@ v128_t test_i64x2_neg(v128_t a) {
// CHECK-LABEL: @test_i64x2_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
@@ -1947,7 +1947,7 @@ bool test_i64x2_all_true(v128_t a) {
// CHECK-LABEL: @test_i64x2_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: ret i32 [[TMP1]]
//
int32_t test_i64x2_bitmask(v128_t a) {
@@ -2035,7 +2035,7 @@ v128_t test_i64x2_mul(v128_t a, v128_t b) {
// CHECK-LABEL: @test_f32x4_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2057,7 +2057,7 @@ v128_t test_f32x4_neg(v128_t a) {
// CHECK-LABEL: @test_f32x4_sqrt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2068,7 +2068,7 @@ v128_t test_f32x4_sqrt(v128_t a) {
// CHECK-LABEL: @test_f32x4_ceil(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2079,7 +2079,7 @@ v128_t test_f32x4_ceil(v128_t a) {
// CHECK-LABEL: @test_f32x4_floor(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2090,7 +2090,7 @@ v128_t test_f32x4_floor(v128_t a) {
// CHECK-LABEL: @test_f32x4_trunc(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2101,7 +2101,7 @@ v128_t test_f32x4_trunc(v128_t a) {
// CHECK-LABEL: @test_f32x4_nearest(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2161,7 +2161,7 @@ v128_t test_f32x4_div(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2173,7 +2173,7 @@ v128_t test_f32x4_min(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2185,7 +2185,7 @@ v128_t test_f32x4_max(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2197,7 +2197,7 @@ v128_t test_f32x4_pmin(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2208,7 +2208,7 @@ v128_t test_f32x4_pmax(v128_t a, v128_t b) {
// CHECK-LABEL: @test_f64x2_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2230,7 +2230,7 @@ v128_t test_f64x2_neg(v128_t a) {
// CHECK-LABEL: @test_f64x2_sqrt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2241,7 +2241,7 @@ v128_t test_f64x2_sqrt(v128_t a) {
// CHECK-LABEL: @test_f64x2_ceil(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2252,7 +2252,7 @@ v128_t test_f64x2_ceil(v128_t a) {
// CHECK-LABEL: @test_f64x2_floor(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2263,7 +2263,7 @@ v128_t test_f64x2_floor(v128_t a) {
// CHECK-LABEL: @test_f64x2_trunc(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2274,7 +2274,7 @@ v128_t test_f64x2_trunc(v128_t a) {
// CHECK-LABEL: @test_f64x2_nearest(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2334,7 +2334,7 @@ v128_t test_f64x2_div(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2346,7 +2346,7 @@ v128_t test_f64x2_min(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2358,7 +2358,7 @@ v128_t test_f64x2_max(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2370,7 +2370,7 @@ v128_t test_f64x2_pmin(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2381,7 +2381,7 @@ v128_t test_f64x2_pmax(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
@@ -2391,7 +2391,7 @@ v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
// CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_u32x4_trunc_sat_f32x4(v128_t a) {
@@ -2443,7 +2443,7 @@ v128_t test_f64x2_convert_low_u32x4(v128_t a) {
// CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2454,7 +2454,7 @@ v128_t test_i32x4_trunc_sat_f64x2_zero(v128_t a) {
// CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2465,7 +2465,7 @@ v128_t test_u32x4_trunc_sat_f64x2_zero(v128_t a) {
// CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.wasm.demote.zero(<2 x double> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.wasm.demote.zero(<2 x double> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2476,9 +2476,10 @@ v128_t test_f32x4_demote_f64x2_zero(v128_t a) {
// CHECK-LABEL: @test_f64x2_promote_low_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> [[TMP0]]) #[[ATTR11]]
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_f64x2_promote_low_f32x4(v128_t a) {
return wasm_f64x2_promote_low_f32x4(a);
@@ -2536,7 +2537,7 @@ v128_t test_i64x2_shuffle(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2548,7 +2549,7 @@ v128_t test_i8x16_swizzle(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2560,7 +2561,7 @@ v128_t test_i8x16_narrow_i16x8(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2570,7 +2571,7 @@ v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i16x8_narrow_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@@ -2580,7 +2581,7 @@ v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u16x8_narrow_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@@ -2727,7 +2728,7 @@ v128_t test_u64x2_extend_high_u32x4(v128_t a) {
// CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2738,7 +2739,7 @@ v128_t test_i16x8_extadd_pairwise_i8x16(v128_t a) {
// CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
@@ -2749,7 +2750,7 @@ v128_t test_u16x8_extadd_pairwise_u8x16(v128_t a) {
// CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
@@ -2759,7 +2760,7 @@ v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
// CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
@@ -2770,7 +2771,7 @@ v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2782,7 +2783,7 @@ v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2794,7 +2795,7 @@ v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2806,7 +2807,7 @@ v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
@@ -2818,7 +2819,7 @@ v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
@@ -2829,7 +2830,7 @@ v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
@@ -2840,7 +2841,7 @@ v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
@@ -2851,7 +2852,7 @@ v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
@@ -2860,7 +2861,7 @@ v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@@ -2870,7 +2871,7 @@ v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
// CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@@ -2880,7 +2881,7 @@ v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@@ -2890,7 +2891,7 @@ v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
// CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
@@ -2902,7 +2903,7 @@ v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index abeb4c0a19f3..0bd10f1b369b 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -263,13 +263,10 @@ def int_wasm_extadd_pairwise_unsigned :
[LLVMSubdivide2VectorType<0>],
[IntrNoMem, IntrSpeculatable]>;
-// TODO: Remove these if possible if they are merged to the spec.
+// TODO: Remove this if possible.
def int_wasm_demote_zero :
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
-def int_wasm_promote_low :
- Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty],
- [IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index 9e229450222f..5bc478390634 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -37,6 +37,7 @@ HANDLE_NODETYPE(EXTEND_HIGH_S)
HANDLE_NODETYPE(EXTEND_HIGH_U)
HANDLE_NODETYPE(CONVERT_LOW_S)
HANDLE_NODETYPE(CONVERT_LOW_U)
+HANDLE_NODETYPE(PROMOTE_LOW)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_S)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_U)
HANDLE_NODETYPE(THROW)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index bd676d636af6..fbcbd46b0d31 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -149,9 +149,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
- // Combine int_to_fp of extract_vectors and vice versa into conversions ops
+ // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
+ // conversions ops
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
+ setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
// Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
@@ -2186,60 +2188,109 @@ performVectorConvertLowCombine(SDNode *N,
if (ResVT != MVT::v2f64)
return SDValue();
- if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) {
- // Combine this:
- //
- // (v2f64 ({s,u}int_to_fp
- // (v2i32 (extract_subvector (v4i32 $x), 0))))
- //
- // into (f64x2.convert_low_i32x4_{s,u} $x).
- auto Extract = N->getOperand(0);
- if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return SDValue();
- if (Extract.getValueType() != MVT::v2i32)
- return SDValue();
- auto Source = Extract.getOperand(0);
- if (Source.getValueType() != MVT::v4i32)
- return SDValue();
- auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
- if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
- return SDValue();
-
- unsigned Op = N->getOpcode() == ISD::SINT_TO_FP
- ? WebAssemblyISD::CONVERT_LOW_S
- : WebAssemblyISD::CONVERT_LOW_U;
-
- return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+ auto GetWasmConversionOp = [](unsigned Op) {
+ switch (Op) {
+ case ISD::SINT_TO_FP:
+ return WebAssemblyISD::CONVERT_LOW_S;
+ case ISD::UINT_TO_FP:
+ return WebAssemblyISD::CONVERT_LOW_U;
+ case ISD::FP_EXTEND:
+ return WebAssemblyISD::PROMOTE_LOW;
+ }
+ llvm_unreachable("unexpected op");
+ };
- } else if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
// Combine this:
//
// (v2f64 (extract_subvector
// (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0))
//
// into (f64x2.convert_low_i32x4_{s,u} $x).
- auto IntToFP = N->getOperand(0);
- if (IntToFP.getOpcode() != ISD::SINT_TO_FP &&
- IntToFP.getOpcode() != ISD::UINT_TO_FP)
+ //
+ // Or this:
+ //
+ // (v2f64 (extract_subvector
+ // (v4f64 (fp_extend (v4f32 $x))), 0))
+ //
+ // into (f64x2.promote_low_f32x4 $x).
+ auto Conversion = N->getOperand(0);
+ auto ConversionOp = Conversion.getOpcode();
+ MVT ExpectedSourceType;
+ switch (ConversionOp) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ ExpectedSourceType = MVT::v4i32;
+ break;
+ case ISD::FP_EXTEND:
+ ExpectedSourceType = MVT::v4f32;
+ break;
+ default:
return SDValue();
- if (IntToFP.getValueType() != MVT::v4f64)
+ }
+
+ if (Conversion.getValueType() != MVT::v4f64)
return SDValue();
- auto Source = IntToFP.getOperand(0);
- if (Source.getValueType() != MVT::v4i32)
+
+ auto Source = Conversion.getOperand(0);
+ if (Source.getValueType() != ExpectedSourceType)
return SDValue();
+
auto IndexNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
return SDValue();
- unsigned Op = IntToFP->getOpcode() == ISD::SINT_TO_FP
- ? WebAssemblyISD::CONVERT_LOW_S
- : WebAssemblyISD::CONVERT_LOW_U;
-
+ auto Op = GetWasmConversionOp(ConversionOp);
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+ }
- } else {
+ // Combine this:
+ //
+ // (v2f64 ({s,u}int_to_fp
+ // (v2i32 (extract_subvector (v4i32 $x), 0))))
+ //
+ // into (f64x2.convert_low_i32x4_{s,u} $x).
+ //
+ // Or this:
+ //
+ // (v2f64 (fp_extend
+ // (v2f32 (extract_subvector (v4f32 $x), 0))))
+ //
+ // into (f64x2.promote_low_f32x4 $x).
+ auto ConversionOp = N->getOpcode();
+ MVT ExpectedExtractType;
+ MVT ExpectedSourceType;
+ switch (ConversionOp) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ ExpectedExtractType = MVT::v2i32;
+ ExpectedSourceType = MVT::v4i32;
+ break;
+ case ISD::FP_EXTEND:
+ ExpectedExtractType = MVT::v2f32;
+ ExpectedSourceType = MVT::v4f32;
+ break;
+ default:
llvm_unreachable("unexpected opcode");
}
+
+ auto Extract = N->getOperand(0);
+ if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ if (Extract.getValueType() != ExpectedExtractType)
+ return SDValue();
+
+ auto Source = Extract.getOperand(0);
+ if (Source.getValueType() != ExpectedSourceType)
+ return SDValue();
+
+ auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+ if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
+ return SDValue();
+
+ unsigned Op = GetWasmConversionOp(ConversionOp);
+ return DAG.getNode(Op, SDLoc(N), ResVT, Source);
}
static SDValue
@@ -2298,6 +2349,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
return performVectorExtendCombine(N, DCI);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::FP_EXTEND:
case ISD::EXTRACT_SUBVECTOR:
return performVectorConvertLowCombine(N, DCI);
case ISD::CONCAT_VECTORS:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index d7058ff04936..c13ca11fe472 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1288,11 +1288,13 @@ defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
"extadd_pairwise_i16x8_u", 0x7f>;
-// Prototype f64x2 conversions
+// f64x2 <-> f32x4 conversions
defm "" : SIMDConvert<F32x4, F64x2, int_wasm_demote_zero,
"demote_zero_f64x2", 0x5e>;
-defm "" : SIMDConvert<F64x2, F32x4, int_wasm_promote_low,
- "promote_low_f32x4", 0x5f>;
+
+def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
+defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
//===----------------------------------------------------------------------===//
// Saturating Rounding Q-Format Multiplication
diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
index 94832a42d18e..97cff76fede0 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -126,3 +126,25 @@ define <2 x double> @convert_low_u_v2f64_2(<4 x i32> %x) {
%a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
ret <2 x double> %a
}
+
+; CHECK-LABEL: promote_low_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @promote_low_v2f64(<4 x float> %x) {
+ %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %a = fpext <2 x float> %v to <2 x double>
+ ret <2 x double> %a
+}
+
+; CHECK-LABEL: promote_low_v2f64_2:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype promote_low_v2f64_2 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @promote_low_v2f64_2(<4 x float> %x) {
+ %v = fpext <4 x float> %x to <4 x double>
+ %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %a
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
index 0fc008d3ef9a..3315338add08 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -806,13 +806,3 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) {
%v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
ret <2 x double> %v
}
-
-; CHECK-LABEL: promote_low_v2f64:
-; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.promote.low(<4 x float>)
-define <2 x double> @promote_low_v2f64(<4 x float> %a) {
- %v = call <2 x double> @llvm.wasm.promote.low(<4 x float> %a)
- ret <2 x double> %v
-}
More information about the cfe-commits
mailing list