[llvm] 6a18cc2 - [WebAssembly] Codegen for i64x2.extend_{low,high}_i32x4_{s,u}

Wed Apr 14 13:43:15 PDT 2021

Author: Thomas Lively
Date: 2021-04-14T13:43:09-07:00
New Revision: 6a18cc23efad410db48a3ccfc233d215de7d4cb9

URL: https://github.com/llvm/llvm-project/commit/6a18cc23efad410db48a3ccfc233d215de7d4cb9
DIFF: https://github.com/llvm/llvm-project/commit/6a18cc23efad410db48a3ccfc233d215de7d4cb9.diff

LOG: [WebAssembly] Codegen for i64x2.extend_{low,high}_i32x4_{s,u}

Removes the builtins and intrinsics used to opt in to using these instructions
and replaces them with normal ISel patterns now that they are no longer
prototypes.

Differential Revision: https://reviews.llvm.org/D100402

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsWebAssembly.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGen/builtins-wasm.c
    llvm/include/llvm/IR/IntrinsicsWebAssembly.td
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-extending.ll
    llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index db8ec8ebeb30..bc0c37a11207 100644

--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -191,11 +191,6 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16UcV8UsV8Us", "nc", "simd
 TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4UiV4Ui", "nc", "simd128")
 
-TARGET_BUILTIN(__builtin_wasm_extend_low_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extend_high_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extend_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_extend_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
-
 TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7871dfd65d53..860492a281fe 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17475,31 +17475,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
         CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
     return Builder.CreateCall(Callee, {Low, High});
   }
-  case WebAssembly::BI__builtin_wasm_extend_low_s_i32x4_i64x2:
-  case WebAssembly::BI__builtin_wasm_extend_high_s_i32x4_i64x2:
-  case WebAssembly::BI__builtin_wasm_extend_low_u_i32x4_i64x2:
-  case WebAssembly::BI__builtin_wasm_extend_high_u_i32x4_i64x2: {
-    Value *Vec = EmitScalarExpr(E->getArg(0));
-    unsigned IntNo;
-    switch (BuiltinID) {
-    case WebAssembly::BI__builtin_wasm_extend_low_s_i32x4_i64x2:
-      IntNo = Intrinsic::wasm_extend_low_signed;
-      break;
-    case WebAssembly::BI__builtin_wasm_extend_high_s_i32x4_i64x2:
-      IntNo = Intrinsic::wasm_extend_high_signed;
-      break;
-    case WebAssembly::BI__builtin_wasm_extend_low_u_i32x4_i64x2:
-      IntNo = Intrinsic::wasm_extend_low_unsigned;
-      break;
-    case WebAssembly::BI__builtin_wasm_extend_high_u_i32x4_i64x2:
-      IntNo = Intrinsic::wasm_extend_high_unsigned;
-      break;
-    default:
-      llvm_unreachable("unexpected builtin ID");
-    }
-    Function *Callee = CGM.getIntrinsic(IntNo);
-    return Builder.CreateCall(Callee, Vec);
-  }
   case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
   case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: {
     Value *Vec = EmitScalarExpr(E->getArg(0));

diff  --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index a5c6f4423c3b..1a986f03dc49 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -890,30 +890,6 @@ u16x8 narrow_u_i16x8_i32x4(u32x4 low, u32x4 high) {
   // WEBASSEMBLY: ret
 }
 
-i64x2 extend_low_s_i32x4_i64x2(i32x4 x) {
-  return __builtin_wasm_extend_low_s_i32x4_i64x2(x);
-  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.low.signed(<4 x i32> %x)
-  // WEBASSEMBLY: ret
-}
-
-i64x2 extend_high_s_i32x4_i64x2(i32x4 x) {
-  return __builtin_wasm_extend_high_s_i32x4_i64x2(x);
-  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.high.signed(<4 x i32> %x)
-  // WEBASSEMBLY: ret
-}
-
-u64x2 extend_low_u_i32x4_i64x2(u32x4 x) {
-  return __builtin_wasm_extend_low_u_i32x4_i64x2(x);
-  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.low.unsigned(<4 x i32> %x)
-  // WEBASSEMBLY: ret
-}
-
-u64x2 extend_high_u_i32x4_i64x2(u32x4 x) {
-  return __builtin_wasm_extend_high_u_i32x4_i64x2(x);
-  // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.high.unsigned(<4 x i32> %x)
-  // WEBASSEMBLY: ret
-}
-
 i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) {
   return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x);
   // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x)

diff  --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 977647db92ad..4e2d557f1f08 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -157,17 +157,6 @@ def int_wasm_narrow_unsigned :
             [llvm_anyvector_ty, LLVMMatchType<1>],
             [IntrNoMem, IntrSpeculatable]>;
 
-// TODO: Replace these intrinsics with normal ISel patterns once i32x4 to i64x2
-// extending is merged to the proposal.
-def int_wasm_extend_low_signed :
-  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_extend_high_signed :
-  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_extend_low_unsigned :
-  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_extend_high_unsigned :
-  Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>;
-
 def int_wasm_q15mulr_sat_signed :
   Intrinsic<[llvm_v8i16_ty],
             [llvm_v8i16_ty, llvm_v8i16_ty],

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index a2f0d1164b6a..acf4f0194758 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1998,8 +1998,8 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
     return SDValue();
   auto Index = IndexNode->getZExtValue();
 
-  // Only v8i8 and v4i16 extracts can be widened, and only if the extracted
-  // subvector is the low or high half of its source.
+  // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
+  // extracted subvector is the low or high half of its source.
   EVT ResVT = N->getValueType(0);
   if (ResVT == MVT::v8i16) {
     if (Extract.getValueType() != MVT::v8i8 ||
@@ -2009,6 +2009,10 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
     if (Extract.getValueType() != MVT::v4i16 ||
         Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
       return SDValue();
+  } else if (ResVT == MVT::v2i64) {
+    if (Extract.getValueType() != MVT::v2i32 ||
+        Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
+      return SDValue();
   } else {
     return SDValue();
   }

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 93f7c15d1444..a1150c0f9450 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1125,17 +1125,9 @@ multiclass SIMDExtend<Vec vec, bits<32> baseInst> {
                         "extend_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>;
 }
 
-defm "" : SIMDExtend<I16x8, 135>;
-defm "" : SIMDExtend<I32x4, 167>;
-
-defm "" : SIMDConvert<I64x2, I32x4, int_wasm_extend_low_signed,
-                      "extend_low_i32x4_s", 199>;
-defm "" : SIMDConvert<I64x2, I32x4, int_wasm_extend_high_signed,
-                      "extend_high_i32x4_s", 200>;
-defm "" : SIMDConvert<I64x2, I32x4, int_wasm_extend_low_unsigned,
-                      "extend_low_i32x4_u", 201>;
-defm "" : SIMDConvert<I64x2, I32x4, int_wasm_extend_high_unsigned,
-                      "extend_high_i32x4_u", 202>;
+defm "" : SIMDExtend<I16x8, 0x87>;
+defm "" : SIMDExtend<I32x4, 0xa7>;
+defm "" : SIMDExtend<I64x2, 0xc7>;
 
 // Narrowing operations
 multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-extending.ll b/llvm/test/CodeGen/WebAssembly/simd-extending.ll
index 3f512cd2678e..9ecee61424e4 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-extending.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-extending.ll
@@ -110,6 +110,58 @@ define <4 x i32> @extend_high_i16x8_u(<8 x i16> %v) {
   ret <4 x i32> %extended
 }
 
+define <2 x i64> @extend_low_i32x4_s(<4 x i32> %v) {
+; CHECK-LABEL: extend_low_i32x4_s:
+; CHECK:         .functype extend_low_i32x4_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64x2.extend_low_i32x4_s
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
+           <2 x i32> <i32 0, i32 1>
+  %extended = sext <2 x i32> %low to <2 x i64>
+  ret <2 x i64> %extended
+}
+
+define <2 x i64> @extend_low_i32x4_u(<4 x i32> %v) {
+; CHECK-LABEL: extend_low_i32x4_u:
+; CHECK:         .functype extend_low_i32x4_u (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64x2.extend_low_i32x4_u
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
+           <2 x i32> <i32 0, i32 1>
+  %extended = zext <2 x i32> %low to <2 x i64>
+  ret <2 x i64> %extended
+}
+
+define <2 x i64> @extend_high_i32x4_s(<4 x i32> %v) {
+; CHECK-LABEL: extend_high_i32x4_s:
+; CHECK:         .functype extend_high_i32x4_s (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64x2.extend_high_i32x4_s
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
+           <2 x i32> <i32 2, i32 3>
+  %extended = sext <2 x i32> %low to <2 x i64>
+  ret <2 x i64> %extended
+}
+
+define <2 x i64> @extend_high_i32x4_u(<4 x i32> %v) {
+; CHECK-LABEL: extend_high_i32x4_u:
+; CHECK:         .functype extend_high_i32x4_u (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64x2.extend_high_i32x4_u
+; CHECK-NEXT:    # fallthrough-return
+  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
+           <2 x i32> <i32 2, i32 3>
+  %extended = zext <2 x i32> %low to <2 x i64>
+  ret <2 x i64> %extended
+}
+
 ;; Also test that similar patterns with offsets not corresponding to
 ;; the low or high half are correctly expanded.
 

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
index 5df5ae9a21bd..6e8e5a2fed71 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -553,46 +553,6 @@ define <4 x i32> @trunc_sat_zero_unsigned_v4i32(<2 x double> %a) {
 ; ==============================================================================
 ; 2 x i64
 ; ==============================================================================
-; CHECK-LABEL: extend_low_s_v2i64:
-; CHECK-NEXT: .functype extend_low_s_v2i64 (v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extend_low_i32x4_s $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extend.low.signed(<4 x i32>)
-define <2 x i64> @extend_low_s_v2i64(<4 x i32> %x) {
-  %a = call <2 x i64> @llvm.wasm.extend.low.signed(<4 x i32> %x)
-  ret <2 x i64> %a
-}
-
-; CHECK-LABEL: extend_high_s_v2i64:
-; CHECK-NEXT: .functype extend_high_s_v2i64 (v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extend_high_i32x4_s $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extend.high.signed(<4 x i32>)
-define <2 x i64> @extend_high_s_v2i64(<4 x i32> %x) {
-  %a = call <2 x i64> @llvm.wasm.extend.high.signed(<4 x i32> %x)
-  ret <2 x i64> %a
-}
-
-; CHECK-LABEL: extend_low_u_v2i64:
-; CHECK-NEXT: .functype extend_low_u_v2i64 (v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extend_low_i32x4_u $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extend.low.unsigned(<4 x i32>)
-define <2 x i64> @extend_low_u_v2i64(<4 x i32> %x) {
-  %a = call <2 x i64> @llvm.wasm.extend.low.unsigned(<4 x i32> %x)
-  ret <2 x i64> %a
-}
-
-; CHECK-LABEL: extend_high_u_v2i64:
-; CHECK-NEXT: .functype extend_high_u_v2i64 (v128) -> (v128){{$}}
-; CHECK-NEXT: i64x2.extend_high_i32x4_u $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x i64> @llvm.wasm.extend.high.unsigned(<4 x i32>)
-define <2 x i64> @extend_high_u_v2i64(<4 x i32> %x) {
-  %a = call <2 x i64> @llvm.wasm.extend.high.unsigned(<4 x i32> %x)
-  ret <2 x i64> %a
-}
-
 ; CHECK-LABEL: extmul_low_s_v2i64:
 ; CHECK-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}}