[llvm] [WebAssembly] Fold extended vector shifts by constant to extmul (PR #184007)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 1 06:14:28 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-webassembly
Author: hanbeom (ParkHanbum)
<details>
<summary>Changes</summary>
Vector shifts of extended operands by a constant vector were lowered
into independent extend and shift nodes.
Example: `shl (WebAssemblyISD::EXTEND_LOW_S t1), <12, 0, 12, 0>`
WebAssembly SIMD lacks extended shifts but supports extended
multiplications. Converting the shift constant into a multiplier and
wrapping it in an extend node normalizes the DAG for extmul selection.
The selector matches the mul(ext, ext) structure into extmul, using
explicit undef padding to fulfill the 128-bit register constraint.
Fixed: https://github.com/llvm/llvm-project/issues/179143
---
Full diff: https://github.com/llvm/llvm-project/pull/184007.diff
2 Files Affected:
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (+73)
- (modified) llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll (+44)
``````````diff
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index faea931aeccdc..39e33cc76a5ba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2797,9 +2797,82 @@ static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
}
+/// Convert a vector shift of an extended value into a multiplication of
+/// extended values. By converting the shift amount to a multiplier (1 << C)
+/// and wrapping it in a matching extend node, we enable the instruction
+/// selector to match the pattern to WebAssembly extended multiplication
+/// instructions (e.g., i32x4.extmul_low_i16x8_s). Inactive lanes in the
+/// multiplier vector are populated with undefs.
+///
+/// Example transformation:
+/// Before:
+/// t1: v8i16 = ...
+/// t2: v4i32 = WebAssemblyISD::EXTEND_LOW_S t1
+/// t3: v4i32 = BUILD_VECTOR Constant:i32<12>, Constant:i32<0>, ...
+/// t4: v4i32 = shl t2, t3
+///
+/// After:
+/// t1: v8i16 = ...
+/// t2: v4i32 = WebAssemblyISD::EXTEND_LOW_S t1
+/// t3: v8i16 = BUILD_VECTOR Constant:i16<4096>, Constant:i16<1>, ..., undef, undef
+/// t4: v4i32 = WebAssemblyISD::EXTEND_LOW_S t3
+/// t5: v4i32 = mul t2, t4
+static SDValue foldShiftByConstantToExtMul(SDValue Op, SelectionDAG &DAG) {
+ if (Op.getOpcode() != ISD::SHL || !Op.getValueType().isVector())
+ return SDValue();
+
+ SDValue RHS = Op.getOperand(1);
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ for (SDValue LaneOp : RHS->ops()) {
+ if (!isa<ConstantSDNode>(LaneOp))
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ unsigned ExtOpc = LHS.getOpcode();
+ bool IsLow = false;
+ if (ExtOpc == WebAssemblyISD::EXTEND_LOW_S ||
+ ExtOpc == WebAssemblyISD::EXTEND_HIGH_S) {
+ IsLow = (ExtOpc == WebAssemblyISD::EXTEND_LOW_S);
+ } else if (ExtOpc == WebAssemblyISD::EXTEND_LOW_U ||
+ ExtOpc == WebAssemblyISD::EXTEND_HIGH_U) {
+ IsLow = (ExtOpc == WebAssemblyISD::EXTEND_LOW_U);
+ } else {
+ return SDValue();
+ }
+
+ SDValue SrcVec = LHS.getOperand(0);
+ EVT SrcVecTy = SrcVec.getValueType();
+ unsigned SrcVecEltNum = SrcVecTy.getVectorNumElements();
+ unsigned ConstVecEltNum = SrcVecEltNum / 2;
+ SmallVector<SDValue, 16> MulConsts(SrcVecEltNum,
+ DAG.getUNDEF(SrcVecTy.getScalarType()));
+ unsigned StartIdx = IsLow ? 0 : ConstVecEltNum;
+ for (unsigned I = 0; I < ConstVecEltNum; ++I) {
+ auto *C = cast<ConstantSDNode>(RHS.getOperand(I));
+ uint64_t ShiftAmt = C->getZExtValue();
+ if (ShiftAmt >= SrcVecTy.getScalarSizeInBits())
+ return SDValue();
+
+ uint64_t MulAmt = 1ULL << ShiftAmt;
+ MulConsts[StartIdx + I] =
+ DAG.getConstant(MulAmt, DL, SrcVecTy.getScalarType());
+ }
+
+ SDValue ConstVec = DAG.getBuildVector(SrcVecTy, DL, MulConsts);
+ SDValue ExtConstVec = DAG.getNode(ExtOpc, DL, Op.getValueType(), ConstVec);
+
+ return DAG.getNode(ISD::MUL, DL, Op.getValueType(), LHS, ExtConstVec);
+}
+
SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
+ if (SDValue FoldedExtMul = foldShiftByConstantToExtMul(Op, DAG))
+ return FoldedExtMul;
// Only manually lower vector shifts
assert(Op.getSimpleValueType().isVector());
diff --git a/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll b/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll
index 94aa197bfd564..28722163d367c 100644
--- a/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll
+++ b/llvm/test/CodeGen/WebAssembly/wide-simd-mul.ll
@@ -195,3 +195,47 @@ define <8 x i32> @zext_sext_mul_v8i16(<8 x i16> %a, <8 x i16> %b) {
%mul = mul <8 x i32> %wide.a, %wide.b
ret <8 x i32> %mul
}
+
+define <4 x i32> @sext_mul_v8i16_with_symmetric_constant_vector(<8 x i16> %v) {
+; CHECK-LABEL: sext_mul_v8i16_with_symmetric_constant_vector:
+; CHECK: .functype sext_mul_v8i16_with_symmetric_constant_vector (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push1=, 4096, 1, 4096, 1, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.extmul_low_i16x8_s $push8=, $0, $pop1
+; CHECK-NEXT: local.tee $push7=, $1=, $pop8
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 4096, 1, 4096, 1
+; CHECK-NEXT: i32x4.extmul_high_i16x8_s $push6=, $0, $pop0
+; CHECK-NEXT: local.tee $push5=, $0=, $pop6
+; CHECK-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: i8x16.shuffle $push2=, $1, $0, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK-NEXT: i32x4.add $push4=, $pop3, $pop2
+; CHECK-NEXT: return $pop4
+ %sext = sext <8 x i16> %v to <8 x i32>
+ %1 = mul nsw <8 x i32> %sext, <i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1>
+ %2 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %3 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %4 = add <4 x i32> %2, %3
+ ret <4 x i32> %4
+}
+
+define <4 x i32> @sext_mul_v8i16_with_constant(<8 x i16> %v) {
+; CHECK-LABEL: sext_mul_v8i16_with_constant:
+; CHECK: .functype sext_mul_v8i16_with_constant (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: v128.const $push1=, 4096, 1, 4096, 1, 0, 0, 0, 0
+; CHECK-NEXT: i32x4.extmul_low_i16x8_s $push8=, $0, $pop1
+; CHECK-NEXT: local.tee $push7=, $1=, $pop8
+; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 4096, 1, 4096, 1
+; CHECK-NEXT: i32x4.extmul_high_i16x8_s $push6=, $0, $pop0
+; CHECK-NEXT: local.tee $push5=, $0=, $pop6
+; CHECK-NEXT: i8x16.shuffle $push3=, $pop7, $pop5, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: i8x16.shuffle $push2=, $1, $0, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK-NEXT: i32x4.add $push4=, $pop3, $pop2
+; CHECK-NEXT: return $pop4
+ %sext = sext <8 x i16> %v to <8 x i32>
+ %1 = mul nsw <8 x i32> %sext, <i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1, i32 4096, i32 1>
+ %2 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %3 = shufflevector <8 x i32> %1, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %4 = add <4 x i32> %2, %3
+ ret <4 x i32> %4
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/184007
More information about the llvm-commits
mailing list