[llvm] 948a847 - [WebAssembly] Recognise EXTEND_HIGH (#123325)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 17 01:04:33 PST 2025


Author: Sam Parker
Date: 2025-02-17T09:04:29Z
New Revision: 948a8477c6a966ee8509400d2857706e933f4149

URL: https://github.com/llvm/llvm-project/commit/948a8477c6a966ee8509400d2857706e933f4149
DIFF: https://github.com/llvm/llvm-project/commit/948a8477c6a966ee8509400d2857706e933f4149.diff

LOG: [WebAssembly] Recognise EXTEND_HIGH (#123325)

When lowering EXTEND_VECTOR_INREG, check whether the operand is a
shuffle that is moving the top half of a vector into the lower half. If
so, we can EXTEND_HIGH the input to the shuffle instead.

Added: 
    llvm/test/CodeGen/WebAssembly/extend-shuffles.ll

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index fedad25c775e2..4fc79b3d6e3f8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2266,6 +2266,32 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                      Op.getOperand(1));
 }
 
+static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
+                             SelectionDAG &DAG) {
+  if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
+    return SDValue();
+
+  assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
+          UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
+         "expected extend_low");
+  auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
+
+  ArrayRef<int> Mask = Shuffle->getMask();
+  // Look for a shuffle which moves from the high half to the low half.
+  size_t FirstIdx = Mask.size() / 2;
+  for (size_t i = 0; i < Mask.size() / 2; ++i) {
+    if (Mask[i] != static_cast<int>(FirstIdx + i)) {
+      return SDValue();
+    }
+  }
+
+  SDLoc DL(Op);
+  unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
+                     ? WebAssemblyISD::EXTEND_HIGH_S
+                     : WebAssemblyISD::EXTEND_HIGH_U;
+  return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
+}
+
 SDValue
 WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
                                                     SelectionDAG &DAG) const {
@@ -2295,6 +2321,12 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
     break;
   }
 
+  if (Scale == 2) {
+    // See if we can use EXTEND_HIGH.
+    if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
+      return ExtendHigh;
+  }
+
   SDValue Ret = Src;
   while (Scale != 1) {
     Ret = DAG.getNode(Ext, DL,

diff  --git a/llvm/test/CodeGen/WebAssembly/extend-shuffles.ll b/llvm/test/CodeGen/WebAssembly/extend-shuffles.ll
new file mode 100644
index 0000000000000..7736e78271e55
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/extend-shuffles.ll
@@ -0,0 +1,227 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -mtriple=wasm32 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=SIMD128
+
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
+target triple = "wasm32"
+
+define <4 x i32> @sext_high_v4i8(<8 x i8> %in) {
+; SIMD128-LABEL: sext_high_v4i8:
+; SIMD128:         .functype sext_high_v4i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; SIMD128-NEXT:    i16x8.extend_low_i8x16_s $push1=, $pop0
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_s $push2=, $pop1
+; SIMD128-NEXT:    return $pop2
+ %shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %res = sext <4 x i8> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @zext_high_v4i8(<8 x i8> %in) {
+; SIMD128-LABEL: zext_high_v4i8:
+; SIMD128:         .functype zext_high_v4i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; SIMD128-NEXT:    i16x8.extend_low_i8x16_u $push1=, $pop0
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_u $push2=, $pop1
+; SIMD128-NEXT:    return $pop2
+ %shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %res = zext <4 x i8> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @sext_high_v8i8(<16 x i8> %in) {
+; SIMD128-LABEL: sext_high_v8i8:
+; SIMD128:         .functype sext_high_v8i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i16x8.extend_high_i8x16_s $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %res = sext <8 x i8> %shuffle to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @zext_high_v8i8(<16 x i8> %in) {
+; SIMD128-LABEL: zext_high_v8i8:
+; SIMD128:         .functype zext_high_v8i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i16x8.extend_high_i8x16_u $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %res = zext <8 x i8> %shuffle to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <2 x i32> @sext_high_v2i16(<4 x i16> %in) {
+; SIMD128-LABEL: sext_high_v2i16:
+; SIMD128:         .functype sext_high_v2i16 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_s $push1=, $pop0
+; SIMD128-NEXT:    return $pop1
+ %shuffle = shufflevector <4 x i16> %in, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+ %res = sext <2 x i16> %shuffle to <2 x i32>
+ ret <2 x i32> %res
+}
+
+define <2 x i32> @zext_high_v2i16(<4 x i16> %in) {
+; SIMD128-LABEL: zext_high_v2i16:
+; SIMD128:         .functype zext_high_v2i16 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_u $push1=, $pop0
+; SIMD128-NEXT:    return $pop1
+ %shuffle = shufflevector <4 x i16> %in, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+ %res = zext <2 x i16> %shuffle to <2 x i32>
+ ret <2 x i32> %res
+}
+
+define <4 x i32> @sext_high_v4i16(<8 x i16> %in) {
+; SIMD128-LABEL: sext_high_v4i16:
+; SIMD128:         .functype sext_high_v4i16 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i32x4.extend_high_i16x8_s $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %res = sext <4 x i16> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @zext_high_v4i16(<8 x i16> %in) {
+; SIMD128-LABEL: zext_high_v4i16:
+; SIMD128:         .functype zext_high_v4i16 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i32x4.extend_high_i16x8_u $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %res = zext <4 x i16> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sext_high_v2i32(<4 x i32> %in) {
+; SIMD128-LABEL: sext_high_v2i32:
+; SIMD128:         .functype sext_high_v2i32 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i64x2.extend_high_i32x4_s $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %res = sext <2 x i32> %shuffle to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @zext_high_v2i32(<4 x i32> %in) {
+; SIMD128-LABEL: zext_high_v2i32:
+; SIMD128:         .functype zext_high_v2i32 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i64x2.extend_high_i32x4_u $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %res = zext <2 x i32> %shuffle to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <4 x i32> @sext_low_v4i8(<8 x i8> %in) {
+; SIMD128-LABEL: sext_low_v4i8:
+; SIMD128:         .functype sext_low_v4i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i16x8.extend_low_i8x16_s $push0=, $0
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_s $push1=, $pop0
+; SIMD128-NEXT:    return $pop1
+ %shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = sext <4 x i8> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @zext_low_v4i8(<8 x i8> %in) {
+; SIMD128-LABEL: zext_low_v4i8:
+; SIMD128:         .functype zext_low_v4i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i16x8.extend_low_i8x16_u $push0=, $0
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_u $push1=, $pop0
+; SIMD128-NEXT:    return $pop1
+ %shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = zext <4 x i8> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @sext_low_v8i8(<16 x i8> %in) {
+; SIMD128-LABEL: sext_low_v8i8:
+; SIMD128:         .functype sext_low_v8i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i16x8.extend_low_i8x16_s $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %res = sext <8 x i8> %shuffle to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @zext_low_v8i8(<16 x i8> %in) {
+; SIMD128-LABEL: zext_low_v8i8:
+; SIMD128:         .functype zext_low_v8i8 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i16x8.extend_low_i8x16_u $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %res = zext <8 x i8> %shuffle to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @sext_low_v4i16(<8 x i16> %in) {
+; SIMD128-LABEL: sext_low_v4i16:
+; SIMD128:         .functype sext_low_v4i16 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_s $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = sext <4 x i16> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @zext_low_v4i16(<8 x i16> %in) {
+; SIMD128-LABEL: zext_low_v4i16:
+; SIMD128:         .functype zext_low_v4i16 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i32x4.extend_low_i16x8_u $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = zext <4 x i16> %shuffle to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sext_low_v2i32(<4 x i32> %in) {
+; SIMD128-LABEL: sext_low_v2i32:
+; SIMD128:         .functype sext_low_v2i32 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i64x2.extend_low_i32x4_s $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+ %res = sext <2 x i32> %shuffle to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @zext_low_v2i32(<4 x i32> %in) {
+; SIMD128-LABEL: zext_low_v2i32:
+; SIMD128:         .functype zext_low_v2i32 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i64x2.extend_low_i32x4_u $push0=, $0
+; SIMD128-NEXT:    return $pop0
+ %shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+ %res = zext <2 x i32> %shuffle to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @multi_use_ext_v2i32(<4 x i32> %in) {
+; SIMD128-LABEL: multi_use_ext_v2i32:
+; SIMD128:         .functype multi_use_ext_v2i32 (v128) -> (v128)
+; SIMD128-NEXT:  # %bb.0:
+; SIMD128-NEXT:    i64x2.extend_high_i32x4_u $push1=, $0
+; SIMD128-NEXT:    i64x2.extend_high_i32x4_s $push0=, $0
+; SIMD128-NEXT:    i64x2.add $push2=, $pop1, $pop0
+; SIMD128-NEXT:    return $pop2
+ %shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %zext = zext <2 x i32> %shuffle to <2 x i64>
+ %sext = sext <2 x i32> %shuffle to <2 x i64>
+ %res = add <2 x i64> %zext, %sext
+ ret <2 x i64> %res
+}

diff  --git a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
index c9af8e2268f1c..0184e22a3b40d 100644
--- a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
+++ b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
@@ -151,9 +151,8 @@ define hidden i64 @i64_mac_s32(ptr nocapture noundef readonly %a, ptr nocapture
 ; MAX-BANDWIDTH: v128.load
 ; MAX-BANDWIDTH: v128.load
 ; MAX-BANDWIDTH: i32x4.mul
-; MAX-BANDWIDTH: i64x2.extend_low_i32x4_s
+; MAX-BANDWIDTH: i64x2.extend_high_i32x4_s
 ; MAX-BANDWIDTH: i64x2.add
-; MAX-BANDWIDTH: i8x16.shuffle	8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
 ; MAX-BANDWIDTH: i64x2.extend_low_i32x4_s
 ; MAX-BANDWIDTH: i64x2.add
 
@@ -272,11 +271,9 @@ define hidden i32 @i32_mac_u16_s16(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK:    i32x4.add
 
 ; MAX-BANDWIDTH: v128.load
-; MAX-BANDWIDTH: i8x16.shuffle	8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
-; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
+; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s
 ; MAX-BANDWIDTH: v128.load
-; MAX-BANDWIDTH: i8x16.shuffle	8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
-; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
+; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
 ; MAX-BANDWIDTH: i32x4.mul
 ; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
 ; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
@@ -377,9 +374,8 @@ define hidden i64 @i64_mac_u32(ptr nocapture noundef readonly %a, ptr nocapture
 ; MAX-BANDWIDTH: v128.load
 ; MAX-BANDWIDTH: v128.load
 ; MAX-BANDWIDTH: i32x4.mul
-; MAX-BANDWIDTH: i64x2.extend_low_i32x4_u
+; MAX-BANDWIDTH: i64x2.extend_high_i32x4_u
 ; MAX-BANDWIDTH: i64x2.add
-; MAX-BANDWIDTH: i8x16.shuffle	8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
 ; MAX-BANDWIDTH: i64x2.extend_low_i32x4_u
 ; MAX-BANDWIDTH: i64x2.add
 


        


More information about the llvm-commits mailing list