[llvm] [DAGCombiner] Allow promoted constants in MULHU by power-of-2 -> SRL transform (PR #170562)
Valeriy Savchenko via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 13:43:17 PST 2025
https://github.com/SavchenkoValeriy updated https://github.com/llvm/llvm-project/pull/170562
>From 19e0f694230e9350c043daa8118f44fae17a3c8d Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Wed, 3 Dec 2025 21:33:15 +0000
Subject: [PATCH 1/2] [AArch64][NFC] Add test for missed vector mulhu
optimization
---
.../CodeGen/AArch64/mulhu-srl-promoted-ops.ll | 35 +++++++++++++++++++
1 file changed, 35 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
diff --git a/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
new file mode 100644
index 0000000000000..3b483c833ce3e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define <8 x i16> @mulhu_v8i16_by_256(<8 x i16> %x) {
+; CHECK-LABEL: mulhu_v8i16_by_256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #1, lsl #8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
+ %x32 = zext <8 x i16> %x to <8 x i32>
+ %mul = mul <8 x i32> %x32, splat (i32 256)
+ %result = lshr <8 x i32> %mul, splat (i32 16)
+ %trunc = trunc <8 x i32> %result to <8 x i16>
+ ret <8 x i16> %trunc
+}
+
+define <16 x i16> @mulhu_v16i16_by_256(<16 x i16> %x) {
+; CHECK-LABEL: mulhu_v16i16_by_256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.8h, #1, lsl #8
+; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT: ret
+ %x32 = zext <16 x i16> %x to <16 x i32>
+ %mul = mul <16 x i32> %x32, splat (i32 256)
+ %result = lshr <16 x i32> %mul, splat (i32 16)
+ %trunc = trunc <16 x i32> %result to <16 x i16>
+ ret <16 x i16> %trunc
+}
>From 55e24d9fc42f0d9a886b1f7fcb05916c40112f0c Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Wed, 3 Dec 2025 21:33:47 +0000
Subject: [PATCH 2/2] [DAGCombiner] Allow promoted constants in vector mulhu ->
shr fold
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +-
.../CodeGen/AArch64/mulhu-srl-promoted-ops.ll | 14 +---
llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll | 71 ++++++-------------
3 files changed, 30 insertions(+), 61 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5377f22e5c61f..6af90f1242fe8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5589,7 +5589,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return DAG.getConstant(0, DL, VT);
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true) &&
hasOperation(ISD::SRL, VT)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
unsigned NumEltBits = VT.getScalarSizeInBits();
@@ -29833,7 +29834,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
return false;
};
- if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
+ if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true)) {
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
diff --git a/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
index 3b483c833ce3e..46f994bfba9a2 100644
--- a/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
+++ b/llvm/test/CodeGen/AArch64/mulhu-srl-promoted-ops.ll
@@ -4,10 +4,7 @@
define <8 x i16> @mulhu_v8i16_by_256(<8 x i16> %x) {
; CHECK-LABEL: mulhu_v8i16_by_256:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.8h, #1, lsl #8
-; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
-; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #8
; CHECK-NEXT: ret
%x32 = zext <8 x i16> %x to <8 x i32>
%mul = mul <8 x i32> %x32, splat (i32 256)
@@ -19,13 +16,8 @@ define <8 x i16> @mulhu_v8i16_by_256(<8 x i16> %x) {
define <16 x i16> @mulhu_v16i16_by_256(<16 x i16> %x) {
; CHECK-LABEL: mulhu_v16i16_by_256:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #1, lsl #8
-; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
-; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
-; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
-; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
-; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
-; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #8
+; CHECK-NEXT: ushr v1.8h, v1.8h, #8
; CHECK-NEXT: ret
%x32 = zext <16 x i16> %x to <16 x i32>
%mul = mul <16 x i32> %x32, splat (i32 256)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
index 3fd7f5be860cf..c0c9b1797f91f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
@@ -48,18 +48,11 @@ define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
}
define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_1(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv1i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv1i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv1i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 1 x i32> splat (i32 16) to <vscale x 1 x i64>
%vc = zext <vscale x 1 x i32> %va to <vscale x 1 x i64>
%vd = mul <vscale x 1 x i64> %vb, %vc
@@ -114,18 +107,11 @@ define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
}
define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_1(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv2i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv2i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv2i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 2 x i32> splat (i32 16) to <vscale x 2 x i64>
%vc = zext <vscale x 2 x i32> %va to <vscale x 2 x i64>
%vd = mul <vscale x 2 x i64> %vb, %vc
@@ -180,18 +166,11 @@ define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
}
define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_1(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv4i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv4i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv4i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 4 x i32> splat (i32 16) to <vscale x 4 x i64>
%vc = zext <vscale x 4 x i32> %va to <vscale x 4 x i64>
%vd = mul <vscale x 4 x i64> %vb, %vc
@@ -246,18 +225,11 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
}
define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv8i32_1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 28
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv8i32_1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 16
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv8i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 28
+; CHECK-NEXT: ret
%vb = zext <vscale x 8 x i32> splat (i32 16) to <vscale x 8 x i64>
%vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
%vd = mul <vscale x 8 x i64> %vb, %vc
@@ -265,3 +237,6 @@ define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
%vf = trunc <vscale x 8 x i64> %ve to <vscale x 8 x i32>
ret <vscale x 8 x i32> %vf
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
More information about the llvm-commits
mailing list