[llvm] 693d767 - [WebAssembly] More codegen for f64x2.convert_low_i32x4_{s, u}

Tue Apr 20 12:37:23 PDT 2021

Author: Thomas Lively
Date: 2021-04-20T12:37:13-07:00
New Revision: 693d767c60933c41abb8663e8b347c2d91240645

URL: https://github.com/llvm/llvm-project/commit/693d767c60933c41abb8663e8b347c2d91240645
DIFF: https://github.com/llvm/llvm-project/commit/693d767c60933c41abb8663e8b347c2d91240645.diff

LOG: [WebAssembly] More codegen for f64x2.convert_low_i32x4_{s,u}

af7925b4dd65 added a custom DAG combine for recognizing fp-to-ints of
extract_subvectors that could be lowered to f64x2.convert_low_i32x4_{s,u}
instructions. This commit extends the combines to recognize equivalent
extract_subvectors of fp-to-ints as well.

Differential Revision: https://reviews.llvm.org/D100790

Added: 
    

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/test/CodeGen/WebAssembly/simd-conversions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index f4e48883db5d..7054ed9d48c5 100644

--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -135,9 +135,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     setTargetDAGCombine(ISD::SIGN_EXTEND);
     setTargetDAGCombine(ISD::ZERO_EXTEND);
 
-    // Combine {s,u}int_to_fp of extract_vectors into conversion ops
+    // Combine int_to_fp of extract_vectors and vice versa into conversions ops
     setTargetDAGCombine(ISD::SINT_TO_FP);
     setTargetDAGCombine(ISD::UINT_TO_FP);
+    setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
 
     // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
     setTargetDAGCombine(ISD::CONCAT_VECTORS);
@@ -2062,36 +2063,65 @@ static SDValue
 performVectorConvertLowCombine(SDNode *N,
                                TargetLowering::DAGCombinerInfo &DCI) {
   auto &DAG = DCI.DAG;
-  assert(N->getOpcode() == ISD::SINT_TO_FP ||
-         N->getOpcode() == ISD::UINT_TO_FP);
 
-  // Combine ({s,u}int_to_fp (extract_subvector ... 0)) to an
-  // f64x2.convert_low_i32x4_{s,u} SDNode.
-  auto Extract = N->getOperand(0);
-  if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
-    return SDValue();
-  auto Source = Extract.getOperand(0);
-  if (Source.getValueType() != MVT::v4i32)
-    return SDValue();
-  auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
-  if (IndexNode == nullptr)
-    return SDValue();
-  auto Index = IndexNode->getZExtValue();
-
-  // The types must be correct.
   EVT ResVT = N->getValueType(0);
-  if (ResVT != MVT::v2f64 || Extract.getValueType() != MVT::v2i32)
+  if (ResVT != MVT::v2f64)
     return SDValue();
 
-  // The extracted vector must be the low half.
-  if (Index != 0)
-    return SDValue();
+  if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) {
+    // Combine this:
+    //
+    //   (v2f64 ({s,u}int_to_fp
+    //     (v2i32 (extract_subvector (v4i32 $x), 0))))
+    //
+    // into (f64x2.convert_low_i32x4_{s,u} $x).
+    auto Extract = N->getOperand(0);
+    if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+      return SDValue();
+    if (Extract.getValueType() != MVT::v2i32)
+      return SDValue();
+    auto Source = Extract.getOperand(0);
+    if (Source.getValueType() != MVT::v4i32)
+      return SDValue();
+    auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+    if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
+      return SDValue();
+
+    unsigned Op = N->getOpcode() == ISD::SINT_TO_FP
+                      ? WebAssemblyISD::CONVERT_LOW_S
+                      : WebAssemblyISD::CONVERT_LOW_U;
+
+    return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+
+  } else if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+    // Combine this:
+    //
+    //   (v2f64 (extract_subvector
+    //     (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0))
+    //
+    // into (f64x2.convert_low_i32x4_{s,u} $x).
+    auto IntToFP = N->getOperand(0);
+    if (IntToFP.getOpcode() != ISD::SINT_TO_FP &&
+        IntToFP.getOpcode() != ISD::UINT_TO_FP)
+      return SDValue();
+    if (IntToFP.getValueType() != MVT::v4f64)
+      return SDValue();
+    auto Source = IntToFP.getOperand(0);
+    if (Source.getValueType() != MVT::v4i32)
+      return SDValue();
+    auto IndexNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
+      return SDValue();
 
-  unsigned Op = N->getOpcode() == ISD::SINT_TO_FP
-                    ? WebAssemblyISD::CONVERT_LOW_S
-                    : WebAssemblyISD::CONVERT_LOW_U;
+    unsigned Op = IntToFP->getOpcode() == ISD::SINT_TO_FP
+                      ? WebAssemblyISD::CONVERT_LOW_S
+                      : WebAssemblyISD::CONVERT_LOW_U;
 
-  return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+    return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+
+  } else {
+    llvm_unreachable("unexpected opcode");
+  }
 }
 
 static SDValue
@@ -2150,6 +2180,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
     return performVectorExtendCombine(N, DCI);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
+  case ISD::EXTRACT_SUBVECTOR:
     return performVectorConvertLowCombine(N, DCI);
   case ISD::CONCAT_VECTORS:
     return performVectorTruncSatLowCombine(N, DCI);

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
index 431d55922040..94832a42d18e 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -103,3 +103,26 @@ define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) {
   %a = uitofp <2 x i32> %v to <2 x double>
   ret <2 x double> %a
 }
+
+
+; CHECK-LABEL: convert_low_s_v2f64_2:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype convert_low_s_v2f64_2 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @convert_low_s_v2f64_2(<4 x i32> %x) {
+  %v = sitofp <4 x i32> %x to <4 x double>
+  %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: convert_low_u_v2f64_2:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype convert_low_u_v2f64_2 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @convert_low_u_v2f64_2(<4 x i32> %x) {
+  %v = uitofp <4 x i32> %x to <4 x double>
+  %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %a
+}