[llvm] [WebAssembly] Fold extended vector shifts by constant to extmul (PR #184007)

Sun Mar 1 06:14:28 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-webassembly

Author: hanbeom (ParkHanbum)

<details>
<summary>Changes</summary>

Vector shifts of extended operands by a constant vector were lowered
into independent extend and shift nodes.
Example: `shl (WebAssemblyISD::EXTEND_LOW_S t1), <12, 0, 12, 0>`

WebAssembly SIMD lacks extended shifts but supports extended
multiplications. Converting the shift constant into a multiplier and
wrapping it in an extend node normalizes the DAG for extmul selection.

The selector matches the mul(ext, ext) structure into extmul, using
explicit undef padding to fulfill the 128-bit register constraint.

Fixed: https://github.com/llvm/llvm-project/issues/179143

---
Full diff: https://github.com/llvm/llvm-project/pull/184007.diff


2 Files Affected:

- (modified) llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (+73) 
- (modified) llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll (+44) 


``````````diff

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index faea931aeccdc..39e33cc76a5ba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2797,9 +2797,82 @@ static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
 }
 
+/// Convert a vector shift of an extended value into a multiplication of
+/// extended values. By converting the shift amount to a multiplier (1 << C)
+/// and wrapping it in a matching extend node, we enable the instruction
+/// selector to match the pattern to WebAssembly extended multiplication
+/// instructions (e.g., i32x4.extmul_low_i16x8_s). Inactive lanes in the
+/// multiplier vector are populated with undefs.
+///
+/// Example transformation:
+/// Before:
+///   t1: v8i16 = ...
+///   t2: v4i32 = WebAssemblyISD::EXTEND_LOW_S t1
+///   t3: v4i32 = BUILD_VECTOR Constant:i32<12>, Constant:i32<0>, ...
+///   t4: v4i32 = shl t2, t3
+///
+/// After:
+///   t1: v8i16 = ...
+///   t2: v4i32 = WebAssemblyISD::EXTEND_LOW_S t1
+///   t3: v8i16 = BUILD_VECTOR Constant:i16<4096>, Constant:i16<1>, ..., undef, undef
+///   t4: v4i32 = WebAssemblyISD::EXTEND_LOW_S t3
+///   t5: v4i32 = mul t2, t4
+static SDValue foldShiftByConstantToExtMul(SDValue Op, SelectionDAG &DAG) {
+  if (Op.getOpcode() != ISD::SHL || !Op.getValueType().isVector())
+    return SDValue();
+
+  SDValue RHS = Op.getOperand(1);
+  if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  for (SDValue LaneOp : RHS->ops()) {
+    if (!isa<ConstantSDNode>(LaneOp))
+      return SDValue();
+  }
+
+  SDLoc DL(Op);
+  SDValue LHS = Op.getOperand(0);
+  unsigned ExtOpc = LHS.getOpcode();
+  bool IsLow = false;
+  if (ExtOpc == WebAssemblyISD::EXTEND_LOW_S ||
+      ExtOpc == WebAssemblyISD::EXTEND_HIGH_S) {
+    IsLow = (ExtOpc == WebAssemblyISD::EXTEND_LOW_S);
+  } else if (ExtOpc == WebAssemblyISD::EXTEND_LOW_U ||
+             ExtOpc == WebAssemblyISD::EXTEND_HIGH_U) {
+    IsLow = (ExtOpc == WebAssemblyISD::EXTEND_LOW_U);
+  } else {
+    return SDValue();
+  }
+
+  SDValue SrcVec = LHS.getOperand(0);
+  EVT SrcVecTy = SrcVec.getValueType();
+  unsigned SrcVecEltNum = SrcVecTy.getVectorNumElements();
+  unsigned ConstVecEltNum = SrcVecEltNum / 2;
+  SmallVector<SDValue, 16> MulConsts(SrcVecEltNum,
+                                     DAG.getUNDEF(SrcVecTy.getScalarType()));
+  unsigned StartIdx = IsLow ? 0 : ConstVecEltNum;
+  for (unsigned I = 0; I < ConstVecEltNum; ++I) {
+    auto *C = cast<ConstantSDNode>(RHS.getOperand(I));
+    uint64_t ShiftAmt = C->getZExtValue();
+    if (ShiftAmt >= SrcVecTy.getScalarSizeInBits())
+      return SDValue();
+
+    uint64_t MulAmt = 1ULL << ShiftAmt;
+    MulConsts[StartIdx + I] =
+        DAG.getConstant(MulAmt, DL, SrcVecTy.getScalarType());
+  }
+
+  SDValue ConstVec = DAG.getBuildVector(SrcVecTy, DL, MulConsts);
+  SDValue ExtConstVec = DAG.getNode(ExtOpc, DL, Op.getValueType(), ConstVec);
+
+  return DAG.getNode(ISD::MUL, DL, Op.getValueType(), LHS, ExtConstVec);
+}
+
 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
                                               SelectionDAG &DAG) const {
   SDLoc DL(Op);
+  if (SDValue FoldedExtMul = foldShiftByConstantToExtMul(Op, DAG))
+    return FoldedExtMul;
 
   // Only manually lower vector shifts
   assert(Op.getSimpleValueType().isVector());
diff --git a/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll b/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll
index 94aa197bfd564..28722163d367c 100644
--- a/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll
+++ b/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll
@@ -195,3 +195,47 @@ define <8 x i32> @zext_sext_mul_v8i16(<8 x i16> %a, <8 x i16> %b) {
   %mul = mul <8 x i32> %wide.a, %wide.b
   ret <8 x i32> %mul
 }
+
+define <4 x i32> @sext_mul_v8i16_with_symmetric_constant_vector(<8 x i16> %v) {
+; CHECK-LABEL: sext_mul_v8i16_with_symmetric_constant_vector:
+; CHECK:         .functype sext_mul_v8i16_with_symmetric_constant_vector (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push1=, 4096, 1, 4096, 1, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.extmul_low_i16x8_s $push8=, $0, $pop1
+; CHECK-NEXT:    local.tee $push7=, $1=, $pop8
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 4096, 1, 4096, 1
+; CHECK-NEXT:    i32x4.extmul_high_i16x8_s $push6=, $0, $pop0
+; CHECK-NEXT:    local.tee $push5=, $0=, $pop6
+; CHECK-NEXT:    i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    i8x16.shuffle $push2=, $1, $0, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK-NEXT:    i32x4.add $push4=, $pop3, $pop2
+; CHECK-NEXT:    return $pop4
+  %sext = sext <8 x i16> %v to <8 x i32>
+  %1 = mul nsw <8 x i32> %sext, <i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %3 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %4 = add <4 x i32> %2, %3
+  ret <4 x i32> %4
+}
+
+define <4 x i32> @sext_mul_v8i16_with_constant(<8 x i16> %v) {
+; CHECK-LABEL: sext_mul_v8i16_with_constant:
+; CHECK:         .functype sext_mul_v8i16_with_constant (v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push1=, 4096, 1, 4096, 1, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.extmul_low_i16x8_s $push8=, $0, $pop1
+; CHECK-NEXT:    local.tee $push7=, $1=, $pop8
+; CHECK-NEXT:    v128.const $push0=, 0, 0, 0, 0, 4096, 1, 4096, 1
+; CHECK-NEXT:    i32x4.extmul_high_i16x8_s $push6=, $0, $pop0
+; CHECK-NEXT:    local.tee $push5=, $0=, $pop6
+; CHECK-NEXT:    i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    i8x16.shuffle $push2=, $1, $0, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK-NEXT:    i32x4.add $push4=, $pop3, $pop2
+; CHECK-NEXT:    return $pop4
+  %sext = sext <8 x i16> %v to <8 x i32>
+  %1 = mul nsw <8 x i32> %sext, <i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1>
+  %2 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %3 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %4 = add <4 x i32> %2, %3
+  ret <4 x i32> %4
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/184007