[llvm] 2f2ae08 - [WebAssembly] Remove experimental SIMD instructions

Thu Mar 18 11:22:36 PDT 2021

Author: Thomas Lively
Date: 2021-03-18T11:21:24-07:00
New Revision: 2f2ae08da91dc5c188d5bb4d8b0b096d0a120a4a

URL: https://github.com/llvm/llvm-project/commit/2f2ae08da91dc5c188d5bb4d8b0b096d0a120a4a
DIFF: https://github.com/llvm/llvm-project/commit/2f2ae08da91dc5c188d5bb4d8b0b096d0a120a4a.diff

LOG: [WebAssembly] Remove experimental SIMD instructions

Removes the instruction definitions, intrinsics, and builtins for qfma/qfms,
signselect, and prefetch instructions, which were not included in the final
WebAssembly SIMD spec.

Depends on D98457.

Differential Revision: https://reviews.llvm.org/D98466

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsWebAssembly.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGen/builtins-wasm.c
    llvm/include/llvm/IR/IntrinsicsWebAssembly.td
    llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
    llvm/test/MC/WebAssembly/simd-encodings.s

Removed: 
    llvm/test/CodeGen/WebAssembly/simd-prefetch-offset.ll


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index 38de66587cba..2f51376ba15a 100644

--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -141,11 +141,6 @@ TARGET_BUILTIN(__builtin_wasm_extadd_pairwise_i16x8_u_i32x4, "V4UiV8Us", "nc", "
 
 TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128")
 
-TARGET_BUILTIN(__builtin_wasm_signselect_i8x16, "V16ScV16ScV16ScV16Sc", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_signselect_i16x8, "V8sV8sV8sV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_signselect_i32x4, "V4iV4iV4iV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_signselect_i64x2, "V2LLiV2LLiV2LLiV2LLi", "nc", "simd128")
-
 TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16ScV16ScV16ScIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128")
 
 TARGET_BUILTIN(__builtin_wasm_any_true_i8x16, "iV16Sc", "nc", "simd128")
@@ -188,11 +183,6 @@ TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128")
 
-TARGET_BUILTIN(__builtin_wasm_qfma_f32x4, "V4fV4fV4fV4f", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_qfms_f32x4, "V4fV4fV4fV4f", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_qfma_f64x2, "V2dV2dV2dV2d", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_qfms_f64x2, "V2dV2dV2dV2d", "nc", "simd128")
-
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")
 
@@ -206,9 +196,6 @@ TARGET_BUILTIN(__builtin_wasm_widen_high_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd1
 TARGET_BUILTIN(__builtin_wasm_widen_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_widen_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
 
-TARGET_BUILTIN(__builtin_wasm_widen_s_i8x16_i32x4, "V4iV16ScIi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_widen_u_i8x16_i32x4, "V4UiV16UcIi", "nc", "simd128")
-
 TARGET_BUILTIN(__builtin_wasm_convert_low_s_i32x4_f64x2, "V2dV4i", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_convert_low_u_i32x4_f64x2, "V2dV4Ui", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
@@ -230,8 +217,5 @@ TARGET_BUILTIN(__builtin_wasm_store64_lane, "vLLi*V2LLiIi", "n", "simd128")
 
 TARGET_BUILTIN(__builtin_wasm_eq_i64x2, "V2LLiV2LLiV2LLi", "nc", "simd128")
 
-TARGET_BUILTIN(__builtin_wasm_prefetch_t, "vv*", "n", "simd128")
-TARGET_BUILTIN(__builtin_wasm_prefetch_nt, "vv*", "n", "simd128")
-
 #undef BUILTIN
 #undef TARGET_BUILTIN

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8d1d3c50870c..96df7b0d6222 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17366,17 +17366,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
         CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
     return Builder.CreateCall(Callee, {V1, V2, C});
   }
-  case WebAssembly::BI__builtin_wasm_signselect_i8x16:
-  case WebAssembly::BI__builtin_wasm_signselect_i16x8:
-  case WebAssembly::BI__builtin_wasm_signselect_i32x4:
-  case WebAssembly::BI__builtin_wasm_signselect_i64x2: {
-    Value *V1 = EmitScalarExpr(E->getArg(0));
-    Value *V2 = EmitScalarExpr(E->getArg(1));
-    Value *C = EmitScalarExpr(E->getArg(2));
-    Function *Callee =
-        CGM.getIntrinsic(Intrinsic::wasm_signselect, ConvertType(E->getType()));
-    return Builder.CreateCall(Callee, {V1, V2, C});
-  }
   case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
     Value *LHS = EmitScalarExpr(E->getArg(0));
     Value *RHS = EmitScalarExpr(E->getArg(1));
@@ -17444,29 +17433,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
     return Builder.CreateCall(Callee, {Vec});
   }
-  case WebAssembly::BI__builtin_wasm_qfma_f32x4:
-  case WebAssembly::BI__builtin_wasm_qfms_f32x4:
-  case WebAssembly::BI__builtin_wasm_qfma_f64x2:
-  case WebAssembly::BI__builtin_wasm_qfms_f64x2: {
-    Value *A = EmitScalarExpr(E->getArg(0));
-    Value *B = EmitScalarExpr(E->getArg(1));
-    Value *C = EmitScalarExpr(E->getArg(2));
-    unsigned IntNo;
-    switch (BuiltinID) {
-    case WebAssembly::BI__builtin_wasm_qfma_f32x4:
-    case WebAssembly::BI__builtin_wasm_qfma_f64x2:
-      IntNo = Intrinsic::wasm_qfma;
-      break;
-    case WebAssembly::BI__builtin_wasm_qfms_f32x4:
-    case WebAssembly::BI__builtin_wasm_qfms_f64x2:
-      IntNo = Intrinsic::wasm_qfms;
-      break;
-    default:
-      llvm_unreachable("unexpected builtin ID");
-    }
-    Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
-    return Builder.CreateCall(Callee, {A, B, C});
-  }
   case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
   case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
   case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
@@ -17515,26 +17481,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Function *Callee = CGM.getIntrinsic(IntNo);
     return Builder.CreateCall(Callee, Vec);
   }
-  case WebAssembly::BI__builtin_wasm_widen_s_i8x16_i32x4:
-  case WebAssembly::BI__builtin_wasm_widen_u_i8x16_i32x4: {
-    Value *Vec = EmitScalarExpr(E->getArg(0));
-    llvm::APSInt SubVecConst =
-        *E->getArg(1)->getIntegerConstantExpr(getContext());
-    Value *SubVec = llvm::ConstantInt::get(getLLVMContext(), SubVecConst);
-    unsigned IntNo;
-    switch (BuiltinID) {
-    case WebAssembly::BI__builtin_wasm_widen_s_i8x16_i32x4:
-      IntNo = Intrinsic::wasm_widen_signed;
-      break;
-    case WebAssembly::BI__builtin_wasm_widen_u_i8x16_i32x4:
-      IntNo = Intrinsic::wasm_widen_unsigned;
-      break;
-    default:
-      llvm_unreachable("unexpected builtin ID");
-    }
-    Function *Callee = CGM.getIntrinsic(IntNo);
-    return Builder.CreateCall(Callee, {Vec, SubVec});
-  }
   case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
   case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: {
     Value *Vec = EmitScalarExpr(E->getArg(0));
@@ -17649,16 +17595,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
     return Builder.CreateCall(Callee, Ops);
   }
-  case WebAssembly::BI__builtin_wasm_prefetch_t: {
-    Value *Ptr = EmitScalarExpr(E->getArg(0));
-    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_prefetch_t);
-    return Builder.CreateCall(Callee, Ptr);
-  }
-  case WebAssembly::BI__builtin_wasm_prefetch_nt: {
-    Value *Ptr = EmitScalarExpr(E->getArg(0));
-    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_prefetch_nt);
-    return Builder.CreateCall(Callee, Ptr);
-  }
   default:
     return nullptr;
   }

diff  --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 124b09633693..71816ceda469 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -644,34 +644,6 @@ i32x4 bitselect(i32x4 x, i32x4 y, i32x4 c) {
   // WEBASSEMBLY-NEXT: ret
 }
 
-i8x16 signselect_i8x16(i8x16 x, i8x16 y, i8x16 c) {
-  return __builtin_wasm_signselect_i8x16(x, y, c);
-  // WEBASSEMBLY: call <16 x i8> @llvm.wasm.signselect.v16i8(
-  // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y, <16 x i8> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-i16x8 signselect_i16x8(i16x8 x, i16x8 y, i16x8 c) {
-  return __builtin_wasm_signselect_i16x8(x, y, c);
-  // WEBASSEMBLY: call <8 x i16> @llvm.wasm.signselect.v8i16(
-  // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y, <8 x i16> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-i32x4 signselect_i32x4(i32x4 x, i32x4 y, i32x4 c) {
-  return __builtin_wasm_signselect_i32x4(x, y, c);
-  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.signselect.v4i32(
-  // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y, <4 x i32> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-i64x2 signselect_i64x2(i64x2 x, i64x2 y, i64x2 c) {
-  return __builtin_wasm_signselect_i64x2(x, y, c);
-  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.signselect.v2i64(
-  // WEBASSEMBLY-SAME: <2 x i64> %x, <2 x i64> %y, <2 x i64> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
 i8x16 popcnt(i8x16 x) {
   return __builtin_wasm_popcnt_i8x16(x);
   // WEBASSEMBLY: call <16 x i8> @llvm.wasm.popcnt(<16 x i8> %x)
@@ -884,34 +856,6 @@ f64x2 sqrt_f64x2(f64x2 x) {
   // WEBASSEMBLY: ret
 }
 
-f32x4 qfma_f32x4(f32x4 a, f32x4 b, f32x4 c) {
-  return __builtin_wasm_qfma_f32x4(a, b, c);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.qfma.v4f32(
-  // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-f32x4 qfms_f32x4(f32x4 a, f32x4 b, f32x4 c) {
-  return __builtin_wasm_qfms_f32x4(a, b, c);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.qfms.v4f32(
-  // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-f64x2 qfma_f64x2(f64x2 a, f64x2 b, f64x2 c) {
-  return __builtin_wasm_qfma_f64x2(a, b, c);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.qfma.v2f64(
-  // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-f64x2 qfms_f64x2(f64x2 a, f64x2 b, f64x2 c) {
-  return __builtin_wasm_qfms_f64x2(a, b, c);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.qfms.v2f64(
-  // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c)
-  // WEBASSEMBLY-NEXT: ret
-}
-
 i32x4 trunc_saturate_s_i32x4_f32x4(f32x4 f) {
   return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(f);
   // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float> %f)
@@ -976,18 +920,6 @@ u64x2 widen_high_u_i32x4_i64x2(u32x4 x) {
   // WEBASSEMBLY: ret
 }
 
-i32x4 widen_s_i8x16_i32x4(i8x16 x) {
-  return __builtin_wasm_widen_s_i8x16_i32x4(x, 3);
-  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.signed(<16 x i8> %x, i32 3)
-  // WEBASSEMBLY: ret
-}
-
-u32x4 widen_u_i8x16_i32x4(u8x16 x) {
-  return __builtin_wasm_widen_u_i8x16_i32x4(x, 3);
-  // WEBASSEMBLY: call <4 x i32> @llvm.wasm.widen.unsigned(<16 x i8> %x, i32 3)
-  // WEBASSEMBLY: ret
-}
-
 f64x2 convert_low_s_i32x4_f64x2(i32x4 x) {
   return __builtin_wasm_convert_low_s_i32x4_f64x2(x);
   // WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x)
@@ -1050,13 +982,3 @@ i8x16 shuffle(i8x16 x, i8x16 y) {
   // WEBASSEMBLY-SAME: i32 15
   // WEBASSEMBLY-NEXT: ret
 }
-
-void prefetch_t(void *p) {
-  return __builtin_wasm_prefetch_t(p);
-  // WEBASSEMBLY: call void @llvm.wasm.prefetch.t(i8* %p)
-}
-
-void prefetch_nt(void *p) {
-  return __builtin_wasm_prefetch_nt(p);
-  // WEBASSEMBLY: call void @llvm.wasm.prefetch.nt(i8* %p)
-}

diff  --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 323b9a770c05..cd916e78f9f4 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -143,14 +143,6 @@ def int_wasm_bitmask :
   Intrinsic<[llvm_i32_ty],
             [llvm_anyvector_ty],
             [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_qfma :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_qfms :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
 def int_wasm_dot :
   Intrinsic<[llvm_v4i32_ty],
             [llvm_v8i16_ty, llvm_v8i16_ty],
@@ -302,11 +294,6 @@ def int_wasm_extadd_pairwise_unsigned :
             [LLVMSubdivide2VectorType<0>],
             [IntrNoMem, IntrSpeculatable]>;
 
-def int_wasm_signselect :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-
 // TODO: Remove this intrinsic and the associated builtin if i64x2.eq gets
 // merged to the proposal.
 def int_wasm_eq :
@@ -314,20 +301,6 @@ def int_wasm_eq :
             [llvm_v2i64_ty, llvm_v2i64_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
-// TODO: Remove this after experiments have been run. Use the target-agnostic
-// int_prefetch if this becomes specified at some point.
-def int_wasm_prefetch_t :
-  Intrinsic<[], [llvm_ptr_ty],
-            [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn,
-             ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
-            "", [SDNPMemOperand]>;
-
-def int_wasm_prefetch_nt :
-  Intrinsic<[], [llvm_ptr_ty],
-            [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn,
-             ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
-            "", [SDNPMemOperand]>;
-
 // TODO: Remove these if possible if they are merged to the spec.
 def int_wasm_convert_low_signed :
   Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
@@ -348,14 +321,6 @@ def int_wasm_promote_low :
   Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
-// TODO: Remove these if possible if they are merged to the spec.
-def int_wasm_widen_signed :
-  Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>;
-def int_wasm_widen_unsigned :
-  Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>;
-
 //===----------------------------------------------------------------------===//
 // Thread-local storage intrinsics
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 5b77b8495adf..3508ec0ba98f 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -196,8 +196,6 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(LOAD8_SPLAT)
   WASM_LOAD_STORE(LOAD_LANE_I8x16)
   WASM_LOAD_STORE(STORE_LANE_I8x16)
-  WASM_LOAD_STORE(PREFETCH_T)
-  WASM_LOAD_STORE(PREFETCH_NT)
   return 0;
   WASM_LOAD_STORE(LOAD16_S_I32)
   WASM_LOAD_STORE(LOAD16_U_I32)

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 8cf44b545e06..f28fe67b0b46 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -761,16 +761,6 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.align = MemAlign;
     return true;
   }
-  case Intrinsic::wasm_prefetch_t:
-  case Intrinsic::wasm_prefetch_nt: {
-    Info.opc = ISD::INTRINSIC_VOID;
-    Info.memVT = MVT::i8;
-    Info.ptrVal = I.getArgOperand(0);
-    Info.offset = 0;
-    Info.align = Align(1);
-    Info.flags = MachineMemOperand::MOLoad;
-    return true;
-  }
   default:
     return false;
   }

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index d1f8cf4f5c15..83f29acf6348 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -791,22 +791,6 @@ def : Pat<(select
           (SELECT_V128 $rhs, $lhs, $cond)>;
 } // foreach vec
 
-// Sign select
-multiclass SIMDSignSelect<Vec vec, bits<32> simdop> {
-  defm SIGNSELECT_#vec :
-    SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
-           [(set (vec.vt V128:$dst),
-             (vec.vt (int_wasm_signselect
-               (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))))],
-           vec.prefix#".signselect\t$dst, $v1, $v2, $c",
-           vec.prefix#".signselect", simdop>;
-}
-
-defm : SIMDSignSelect<I8x16, 125>;
-defm : SIMDSignSelect<I16x8, 126>;
-defm : SIMDSignSelect<I32x4, 127>;
-defm : SIMDSignSelect<I64x2, 148>;
-
 //===----------------------------------------------------------------------===//
 // Integer unary arithmetic
 //===----------------------------------------------------------------------===//
@@ -1270,90 +1254,9 @@ defm "" : SIMDConvert<F32x4, F64x2, int_wasm_demote_zero,
 defm "" : SIMDConvert<F64x2, F32x4, int_wasm_promote_low,
                       "promote_low_f32x4", 0x69>;
 
-// Prototype i8x16 to i32x4 widening
-defm WIDEN_I8x16_TO_I32x4_S :
-  SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
-         (outs), (ins vec_i8imm_op:$idx),
-         [(set (I32x4.vt V128:$dst),
-            (I32x4.vt (int_wasm_widen_signed
-              (I8x16.vt V128:$vec), (i32 timm:$idx))))],
-         "i32x4.widen_i8x16_s\t$dst, $vec, $idx",
-         "i32x4.widen_i8x16_s\t$idx", 0x67>;
-defm WIDEN_I8x16_TO_I32x4_U :
-  SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
-         (outs), (ins vec_i8imm_op:$idx),
-         [(set (I32x4.vt V128:$dst),
-            (I32x4.vt (int_wasm_widen_unsigned
-              (I8x16.vt V128:$vec), (i32 timm:$idx))))],
-         "i32x4.widen_i8x16_u\t$dst, $vec, $idx",
-         "i32x4.widen_i8x16_u\t$idx", 0x68>;
-
-
-//===----------------------------------------------------------------------===//
-// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS)
-//===----------------------------------------------------------------------===//
-
-multiclass SIMDQFM<Vec vec, bits<32> simdopA, bits<32> simdopS> {
-  defm QFMA_#vec :
-    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
-           (outs), (ins),
-           [(set (vec.vt V128:$dst), (int_wasm_qfma
-              (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
-           vec.prefix#".qfma\t$dst, $a, $b, $c", vec.prefix#".qfma", simdopA>;
-  defm QFMS_#vec :
-    SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
-           (outs), (ins),
-           [(set (vec.vt V128:$dst), (int_wasm_qfms
-              (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
-           vec.prefix#".qfms\t$dst, $a, $b, $c", vec.prefix#".qfms", simdopS>;
-}
-
-defm "" : SIMDQFM<F32x4, 180, 212>;
-defm "" : SIMDQFM<F64x2, 254, 255>;
-
 //===----------------------------------------------------------------------===//
 // Saturating Rounding Q-Format Multiplication
 //===----------------------------------------------------------------------===//
 
 defm Q15MULR_SAT_S :
   SIMDBinary<I16x8, int_wasm_q15mulr_saturate_signed, "q15mulr_sat_s", 156>;
-
-//===----------------------------------------------------------------------===//
-// Experimental prefetch instructions: prefetch.t, prefetch.nt
-//===----------------------------------------------------------------------===//
-
-let mayLoad = true, UseNamedOperandTable = true in {
-defm PREFETCH_T_A32 :
-  SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-         (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-         "prefetch.t\t${off}(${addr})$p2align",
-         "prefetch.t\t$off$p2align", 0xc5>;
-defm PREFETCH_T_A64 :
-  SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
-         (outs), (ins P2Align:$p2align, offset64_op:$off), [],
-         "prefetch.t\t${off}(${addr})$p2align",
-         "prefetch.t\t$off$p2align", 0xc5>;
-defm PREFETCH_NT_A32 :
-  SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-         (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-         "prefetch.nt\t${off}(${addr})$p2align",
-         "prefetch.nt\t$off$p2align", 0xc6>;
-defm PREFETCH_NT_A64 :
-  SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
-         (outs), (ins P2Align:$p2align, offset64_op:$off), [],
-         "prefetch.nt\t${off}(${addr})$p2align",
-         "prefetch.nt\t$off$p2align", 0xc6>;
-} // mayLoad, UseNamedOperandTable
-
-multiclass PrefetchPatNoOffset<Intrinsic kind, string inst> {
-  def : Pat<(kind I32:$addr), (!cast<NI>(inst # "_A32") 0, 0, $addr)>,
-        Requires<[HasAddr32]>;
-  def : Pat<(kind I64:$addr), (!cast<NI>(inst # "_A64") 0, 0, $addr)>,
-        Requires<[HasAddr64]>;
-}
-
-foreach inst = [["PREFETCH_T", "int_wasm_prefetch_t"],
-                ["PREFETCH_NT", "int_wasm_prefetch_nt"]] in {
-defvar node = !cast<Intrinsic>(inst[1]);
-defm : PrefetchPatNoOffset<node, inst[0]>;
-}

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
index a3b0d50903f6..606b8b6753d1 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -127,18 +127,6 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %c) {
   ret <16 x i8> %a
 }
 
-; CHECK-LABEL: signselect_v16i8:
-; CHECK-NEXT: .functype signselect_v16i8 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i8x16.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <16 x i8> @llvm.wasm.signselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
-define <16 x i8> @signselect_v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %c) {
-  %a = call <16 x i8> @llvm.wasm.signselect.v16i8(
-     <16 x i8> %v1, <16 x i8> %v2, <16 x i8> %c
-  )
-  ret <16 x i8> %a
-}
-
 ; CHECK-LABEL: narrow_signed_v16i8:
 ; CHECK-NEXT: .functype narrow_signed_v16i8 (v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i8x16.narrow_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}}
@@ -371,18 +359,6 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %c) {
   ret <8 x i16> %a
 }
 
-; CHECK-LABEL: signselect_v8i16:
-; CHECK-NEXT: .functype signselect_v8i16 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i16x8.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <8 x i16> @llvm.wasm.signselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
-define <8 x i16> @signselect_v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %c) {
-  %a = call <8 x i16> @llvm.wasm.signselect.v8i16(
-    <8 x i16> %v1, <8 x i16> %v2, <8 x i16> %c
-  )
-  ret <8 x i16> %a
-}
-
 ; CHECK-LABEL: narrow_signed_v8i16:
 ; CHECK-NEXT: .functype narrow_signed_v8i16 (v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i16x8.narrow_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}
@@ -532,18 +508,6 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c) {
   ret <4 x i32> %a
 }
 
-; CHECK-LABEL: signselect_v4i32:
-; CHECK-NEXT: .functype signselect_v4i32 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.signselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-define <4 x i32> @signselect_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c) {
-  %a = call <4 x i32> @llvm.wasm.signselect.v4i32(
-    <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c
-  )
-  ret <4 x i32> %a
-}
-
 ; CHECK-LABEL: trunc_sat_s_v4i32:
 ; NO-CHECK-NOT: f32x4
 ; CHECK-NEXT: .functype trunc_sat_s_v4i32 (v128) -> (v128){{$}}
@@ -586,27 +550,6 @@ define <4 x i32> @trunc_sat_zero_unsigned_v4i32(<2 x double> %a) {
   ret <4 x i32> %v
 }
 
-
-; CHECK-LABEL: widen_signed_v4i32:
-; CHECK-NEXT: .functype widen_signed_v4i32 (v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.widen_i8x16_s $push[[R:[0-9]+]]=, $0, 1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.signed(<16 x i8>, i32 immarg)
-define <4 x i32> @widen_signed_v4i32(<16 x i8> %x) {
-  %v = call <4 x i32> @llvm.wasm.widen.signed(<16 x i8> %x, i32 1)
-  ret <4 x i32> %v
-}
-
-; CHECK-LABEL: widen_unsigned_v4i32:
-; CHECK-NEXT: .functype widen_unsigned_v4i32 (v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.widen_i8x16_u $push[[R:[0-9]+]]=, $0, 1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x i32> @llvm.wasm.widen.unsigned(<16 x i8>, i32 immarg)
-define <4 x i32> @widen_unsigned_v4i32(<16 x i8> %x) {
-  %v = call <4 x i32> @llvm.wasm.widen.unsigned(<16 x i8> %x, i32 1)
-  ret <4 x i32> %v
-}
-
 ; ==============================================================================
 ; 2 x i64
 ; ==============================================================================
@@ -750,18 +693,6 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) {
   ret <2 x i64> %a
 }
 
-; CHECK-LABEL: signselect_v2i64:
-; CHECK-NEXT: .functype signselect_v2i64 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.signselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
-define <2 x i64> @signselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) {
-  %a = call <2 x i64> @llvm.wasm.signselect.v2i64(
-    <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c
-  )
-  ret <2 x i64> %a
-}
-
 ; ==============================================================================
 ; 4 x f32
 ; ==============================================================================
@@ -837,30 +768,6 @@ define <4 x float> @nearest_v4f32(<4 x float> %a) {
   ret <4 x float> %v
 }
 
-; CHECK-LABEL: qfma_v4f32:
-; CHECK-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.qfma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-define <4 x float> @qfma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-  %v = call <4 x float> @llvm.wasm.qfma.v4f32(
-    <4 x float> %a, <4 x float> %b, <4 x float> %c
-  )
-  ret <4 x float> %v
-}
-
-; CHECK-LABEL: qfms_v4f32:
-; CHECK-NEXT: .functype qfms_v4f32 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f32x4.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.qfms.v4f32(<4 x float>, <4 x float>, <4 x float>)
-define <4 x float> @qfms_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-  %v = call <4 x float> @llvm.wasm.qfms.v4f32(
-    <4 x float> %a, <4 x float> %b, <4 x float> %c
-  )
-  ret <4 x float> %v
-}
-
 ; CHECK-LABEL: demote_zero_v4f32:
 ; CHECK-NEXT: .functype demote_zero_v4f32 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f32x4.demote_zero_f64x2 $push[[R:[0-9]+]]=, $0{{$}}
@@ -946,30 +853,6 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) {
   ret <2 x double> %v
 }
 
-; CHECK-LABEL: qfma_v2f64:
-; CHECK-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.qfma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-define <2 x double> @qfma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-  %v = call <2 x double> @llvm.wasm.qfma.v2f64(
-    <2 x double> %a, <2 x double> %b, <2 x double> %c
-  )
-  ret <2 x double> %v
-}
-
-; CHECK-LABEL: qfms_v2f64:
-; CHECK-NEXT: .functype qfms_v2f64 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.qfms.v2f64(<2 x double>, <2 x double>, <2 x double>)
-define <2 x double> @qfms_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-  %v = call <2 x double> @llvm.wasm.qfms.v2f64(
-    <2 x double> %a, <2 x double> %b, <2 x double> %c
-  )
-  ret <2 x double> %v
-}
-
 ; CHECK-LABEL: convert_low_signed_v2f64:
 ; CHECK-NEXT: .functype convert_low_signed_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0{{$}}

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-prefetch-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-prefetch-offset.ll
deleted file mode 100644
index f3b54481c0e4..000000000000
--- a/llvm/test/CodeGen/WebAssembly/simd-prefetch-offset.ll
+++ /dev/null
@@ -1,235 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
-
-; Test experimental prefetch instructions
-
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
-
-declare void @llvm.wasm.prefetch.t(i8*)
-declare void @llvm.wasm.prefetch.nt(i8*)
- at gv = global i8 0
-
-;===----------------------------------------------------------------------------
-; prefetch.t
-;===----------------------------------------------------------------------------
-
-define void @prefetch_t_no_offset(i8* %p) {
-; CHECK-LABEL: prefetch_t_no_offset:
-; CHECK:         .functype prefetch_t_no_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  tail call void @llvm.wasm.prefetch.t(i8* %p)
-  ret void
-}
-
-define void @prefetch_t_with_folded_offset(i8* %p) {
-; CHECK-LABEL: prefetch_t_with_folded_offset:
-; CHECK:         .functype prefetch_t_with_folded_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 24
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  %q = ptrtoint i8* %p to i32
-  %r = add nuw i32 %q, 24
-  %s = inttoptr i32 %r to i8*
-  tail call void @llvm.wasm.prefetch.t(i8* %s)
-  ret void
-}
-
-define void @prefetch_t_with_folded_gep_offset(i8* %p) {
-; CHECK-LABEL: prefetch_t_with_folded_gep_offset:
-; CHECK:         .functype prefetch_t_with_folded_gep_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 6
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = getelementptr inbounds i8, i8* %p, i32 6
-  tail call void @llvm.wasm.prefetch.t(i8* %s)
-  ret void
-}
-
-define void @prefetch_t_with_unfolded_gep_negative_offset(i8* %p) {
-; CHECK-LABEL: prefetch_t_with_unfolded_gep_negative_offset:
-; CHECK:         .functype prefetch_t_with_unfolded_gep_negative_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const -6
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = getelementptr inbounds i8, i8* %p, i32 -6
-  tail call void @llvm.wasm.prefetch.t(i8* %s)
-  ret void
-}
-
-define void @prefetch_t_with_unfolded_offset(i8* %p) {
-; CHECK-LABEL: prefetch_t_with_unfolded_offset:
-; CHECK:         .functype prefetch_t_with_unfolded_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 24
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  %q = ptrtoint i8* %p to i32
-  %r = add nsw i32 %q, 24
-  %s = inttoptr i32 %r to i8*
-  tail call void @llvm.wasm.prefetch.t(i8* %s)
-  ret void
-}
-
-define void @prefetch_t_with_unfolded_gep_offset(i8* %p) {
-; CHECK-LABEL: prefetch_t_with_unfolded_gep_offset:
-; CHECK:         .functype prefetch_t_with_unfolded_gep_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 6
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = getelementptr i8, i8* %p, i32 6
-  tail call void @llvm.wasm.prefetch.t(i8* %s)
-  ret void
-}
-
-define void @prefetch_t_from_numeric_address() {
-; CHECK-LABEL: prefetch_t_from_numeric_address:
-; CHECK:         .functype prefetch_t_from_numeric_address () -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i32.const 42
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = inttoptr i32 42 to i8*
-  tail call void @llvm.wasm.prefetch.t(i8* %s)
-  ret void
-}
-
-define void @prefetch_t_from_global_address() {
-; CHECK-LABEL: prefetch_t_from_global_address:
-; CHECK:         .functype prefetch_t_from_global_address () -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i32.const gv
-; CHECK-NEXT:    prefetch.t 0
-; CHECK-NEXT:    # fallthrough-return
-  tail call void @llvm.wasm.prefetch.t(i8* @gv)
-  ret void
-}
-
-;===----------------------------------------------------------------------------
-; prefetch.nt
-;===----------------------------------------------------------------------------
-
-define void @prefetch_nt_no_offset(i8* %p) {
-; CHECK-LABEL: prefetch_nt_no_offset:
-; CHECK:         .functype prefetch_nt_no_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  tail call void @llvm.wasm.prefetch.nt(i8* %p)
-  ret void
-}
-
-define void @prefetch_nt_with_folded_offset(i8* %p) {
-; CHECK-LABEL: prefetch_nt_with_folded_offset:
-; CHECK:         .functype prefetch_nt_with_folded_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 24
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  %q = ptrtoint i8* %p to i32
-  %r = add nuw i32 %q, 24
-  %s = inttoptr i32 %r to i8*
-  tail call void @llvm.wasm.prefetch.nt(i8* %s)
-  ret void
-}
-
-define void @prefetch_nt_with_folded_gep_offset(i8* %p) {
-; CHECK-LABEL: prefetch_nt_with_folded_gep_offset:
-; CHECK:         .functype prefetch_nt_with_folded_gep_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 6
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = getelementptr inbounds i8, i8* %p, i64 6
-  tail call void @llvm.wasm.prefetch.nt(i8* %s)
-  ret void
-}
-
-define void @prefetch_nt_with_unfolded_gep_negative_offset(i8* %p) {
-; CHECK-LABEL: prefetch_nt_with_unfolded_gep_negative_offset:
-; CHECK:         .functype prefetch_nt_with_unfolded_gep_negative_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const -6
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = getelementptr inbounds i8, i8* %p, i64 -6
-  tail call void @llvm.wasm.prefetch.nt(i8* %s)
-  ret void
-}
-
-define void @prefetch_nt_with_unfolded_offset(i8* %p) {
-; CHECK-LABEL: prefetch_nt_with_unfolded_offset:
-; CHECK:         .functype prefetch_nt_with_unfolded_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 24
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  %q = ptrtoint i8* %p to i32
-  %r = add nsw i32 %q, 24
-  %s = inttoptr i32 %r to i8*
-  tail call void @llvm.wasm.prefetch.nt(i8* %s)
-  ret void
-}
-
-define void @prefetch_nt_with_unfolded_gep_offset(i8* %p) {
-; CHECK-LABEL: prefetch_nt_with_unfolded_gep_offset:
-; CHECK:         .functype prefetch_nt_with_unfolded_gep_offset (i32) -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 6
-; CHECK-NEXT:    i32.add
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = getelementptr i8, i8* %p, i64 6
-  tail call void @llvm.wasm.prefetch.nt(i8* %s)
-  ret void
-}
-
-define void @prefetch_nt_from_numeric_address() {
-; CHECK-LABEL: prefetch_nt_from_numeric_address:
-; CHECK:         .functype prefetch_nt_from_numeric_address () -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i32.const 42
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  %s = inttoptr i32 42 to i8*
-  tail call void @llvm.wasm.prefetch.nt(i8* %s)
-  ret void
-}
-
-define void @prefetch_nt_from_global_address() {
-; CHECK-LABEL: prefetch_nt_from_global_address:
-; CHECK:         .functype prefetch_nt_from_global_address () -> ()
-; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    i32.const gv
-; CHECK-NEXT:    prefetch.nt 0
-; CHECK-NEXT:    # fallthrough-return
-  tail call void @llvm.wasm.prefetch.nt(i8* @gv)
-  ret void
-}

diff  --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 4ecf5e487665..f9f4a553a63d 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -664,18 +664,6 @@ main:
     # CHECK: v128.load64_zero 32 # encoding: [0xfd,0xfd,0x01,0x03,0x20]
     v128.load64_zero 32
 
-    # CHECK: f32x4.qfma # encoding: [0xfd,0xb4,0x01]
-    f32x4.qfma
-
-    # CHECK: f32x4.qfms # encoding: [0xfd,0xd4,0x01]
-    f32x4.qfms
-
-    # CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01]
-    f64x2.qfma
-
-    # CHECK: f64x2.qfms # encoding: [0xfd,0xff,0x01]
-    f64x2.qfms
-
     # CHECK: i16x8.extmul_low_i8x16_s # encoding: [0xfd,0x9a,0x01]
     i16x8.extmul_low_i8x16_s
 
@@ -712,18 +700,6 @@ main:
     # CHECK: i64x2.extmul_high_i32x4_u # encoding: [0xfd,0xd7,0x01]
     i64x2.extmul_high_i32x4_u
 
-    # CHECK: i8x16.signselect # encoding: [0xfd,0x7d]
-    i8x16.signselect
-
-    # CHECK: i16x8.signselect # encoding: [0xfd,0x7e]
-    i16x8.signselect
-
-    # CHECK: i32x4.signselect # encoding: [0xfd,0x7f]
-    i32x4.signselect
-
-    # CHECK: i64x2.signselect # encoding: [0xfd,0x94,0x01]
-    i64x2.signselect
-
     # CHECK: i16x8.extadd_pairwise_i8x16_s # encoding: [0xfd,0xc2,0x01]
     i16x8.extadd_pairwise_i8x16_s
 
@@ -736,12 +712,6 @@ main:
     # CHECK: i32x4.extadd_pairwise_i16x8_u # encoding: [0xfd,0xa6,0x01]
     i32x4.extadd_pairwise_i16x8_u
 
-    # CHECK: prefetch.t 16 # encoding: [0xfd,0xc5,0x01,0x00,0x10]
-    prefetch.t 16
-
-    # CHECK: prefetch.nt 16 # encoding: [0xfd,0xc6,0x01,0x00,0x10]
-    prefetch.nt 16
-
     # CHECK: f64x2.convert_low_i32x4_s # encoding: [0xfd,0x53]
     f64x2.convert_low_i32x4_s
 
@@ -760,10 +730,4 @@ main:
     # CHECK: f64x2.promote_low_f32x4 # encoding: [0xfd,0x69]
     f64x2.promote_low_f32x4
 
-    # CHECK: i32x4.widen_i8x16_s 3 # encoding: [0xfd,0x67,0x03]
-    i32x4.widen_i8x16_s 3
-
-    # CHECK: i32x4.widen_i8x16_u 3 # encoding: [0xfd,0x68,0x03]
-    i32x4.widen_i8x16_u 3
-
     end_function