[llvm] [AArch64] Optimize vector multiplications by certain constants for v2i64 (PR #183827)
Aly ElAshram via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 25 13:56:37 PDT 2026
https://github.com/AlyElashram updated https://github.com/llvm/llvm-project/pull/183827
>From 825a454ad3788d17831361fc5e02d51181186355 Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Fri, 27 Feb 2026 22:21:34 +0200
Subject: [PATCH 1/9] Optimize the mul to a shl add/sub for cases +-1 of a
power 2 number
---
.../Target/AArch64/AArch64ISelLowering.cpp | 42 ++++++++++++++++
llvm/test/CodeGen/AArch64/neon-mul-shl.ll | 48 +++++++++++++++++++
2 files changed, 90 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/neon-mul-shl.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index eb6e9146e3839..0b64a201efde5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20434,6 +20434,44 @@ static SDValue performVectorExtCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Transform mul<v2i64, splat(2^n +-1)> into a SHL and ADD/SUB
+// this transormation is much faster when vector mul is not supported
+static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
+ const SDNode *Operand = N->getOperand(1).getNode();
+ APInt SplatValue;
+ ISD::isConstantSplatVector(Operand, SplatValue);
+
+ // Not a constant splat so should just stay as a mulitplcation operation
+ if (!SplatValue.getBoolValue())
+ return SDValue();
+
+ // If (Value - 1) is a power of 2, we need an ADD (e.g., 257)
+ bool NeedsAdd = (SplatValue - 1).isPowerOf2();
+ bool NeedsSub = (SplatValue + 1).isPowerOf2();
+
+ // If the constant is not (2^n + 1) or (2^n - 1), it would require
+ // more than one addition/subtraction. For v2i64, the cost of
+ // multiple vector adds/shifts often exceeds the cost of
+ // scalarization (moving to GPRs to use a single MUL).
+ if (!NeedsSub && !NeedsAdd)
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+
+ unsigned ShiftAmt =
+ NeedsAdd ? (SplatValue - 1).logBase2() : (SplatValue + 1).logBase2();
+ SDValue VecShiftAmt = DAG.getConstant(ShiftAmt, DL, VT);
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, VT, LHS, VecShiftAmt);
+
+ // Emit: (LHS << ShiftAmt) +- LHS
+ if (NeedsAdd) {
+ return DAG.getNode(ISD::ADD, DL, VT, ShiftNode, LHS);
+ }
+ return DAG.getNode(ISD::SUB, DL, VT, ShiftNode, LHS);
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -20444,6 +20482,10 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return Ext;
if (SDValue Ext = performVectorExtCombine(N, DAG))
return Ext;
+ if(Subtarget->isNeonAvailable()) {
+ if (SDValue Ext = convertMulToShlAdd(N, DAG))
+ return Ext;
+ }
if (DCI.isBeforeLegalizeOps())
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/neon-mul-shl.ll b/llvm/test/CodeGen/AArch64/neon-mul-shl.ll
new file mode 100644
index 0000000000000..69890260b840e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-mul-shl.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+
+define <2 x i64> @mul_v2i64_257(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_257:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.2d, v0.2d, #8
+; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 257)
+ ret <2 x i64> %mul
+}
+
+define <2 x i64> @mul_v2i64_255(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.2d, v0.2d, #8
+; CHECK-NEXT: sub v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 255)
+ ret <2 x i64> %mul
+}
+
+define <2 x i64> @mul_v2i64_9(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_9:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.2d, v0.2d, #3
+; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 9)
+ ret <2 x i64> %mul
+}
+
+;; This should not be optimized
+define <2 x i64> @mul_v2i64_13_no_opt(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_13_no_opt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x10, d0
+; CHECK-NEXT: mov w8, #13 // =0xd
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: mul x10, x10, x8
+; CHECK-NEXT: mul x8, x9, x8
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 13)
+ ret <2 x i64> %mul
+}
>From 11940e41cd33e55ba8200bb6d53db9d34ffe4fa4 Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Fri, 27 Feb 2026 22:23:39 +0200
Subject: [PATCH 2/9] Add missed vector size check
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0b64a201efde5..fe18c853138b0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20482,7 +20482,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return Ext;
if (SDValue Ext = performVectorExtCombine(N, DAG))
return Ext;
- if(Subtarget->isNeonAvailable()) {
+ if(Subtarget->isNeonAvailable() && N->getValueType(0) == MVT::v2i64) {
if (SDValue Ext = convertMulToShlAdd(N, DAG))
return Ext;
}
>From 8fa911c96875a1c1464a54924aaa3eeb69f7dd03 Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Fri, 27 Feb 2026 22:28:58 +0200
Subject: [PATCH 3/9] Formatting
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fe18c853138b0..a47db2a010892 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20482,7 +20482,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return Ext;
if (SDValue Ext = performVectorExtCombine(N, DAG))
return Ext;
- if(Subtarget->isNeonAvailable() && N->getValueType(0) == MVT::v2i64) {
+ if (Subtarget->isNeonAvailable() && N->getValueType(0) == MVT::v2i64) {
if (SDValue Ext = convertMulToShlAdd(N, DAG))
return Ext;
}
>From 9afa4be127dee034a10e7ce6156766230b119f48 Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Thu, 5 Mar 2026 20:02:44 +0200
Subject: [PATCH 4/9] Update the optimization so that it happens after the
s/umull selection
---
.../Target/AArch64/AArch64ISelLowering.cpp | 84 +++++++++----------
1 file changed, 40 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a47db2a010892..c56c9c7aacc1b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5919,6 +5919,44 @@ static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
return 0;
}
+// Transform mul<v2i64, splat(2^n +-1)> into a SHL and ADD/SUB
+// this transormation is much faster when vector mul is not supported
+static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
+ const SDNode *Operand = N->getOperand(1).getNode();
+ APInt SplatValue;
+ ISD::isConstantSplatVector(Operand, SplatValue);
+
+ // Not a constant splat so should just stay as a mulitplcation operation
+ if (!SplatValue.getBoolValue())
+ return SDValue();
+
+ // If (Value - 1) is a power of 2, we need an ADD (e.g., 257)
+ bool NeedsAdd = (SplatValue - 1).isPowerOf2();
+ bool NeedsSub = (SplatValue + 1).isPowerOf2();
+
+ // If the constant is not (2^n + 1) or (2^n - 1), it would require
+ // more than one addition/subtraction. For v2i64, the cost of
+ // multiple vector adds/shifts often exceeds the cost of
+ // scalarization (moving to GPRs to use a single MUL).
+ if (!NeedsSub && !NeedsAdd)
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+
+ unsigned ShiftAmt =
+ NeedsAdd ? (SplatValue - 1).logBase2() : (SplatValue + 1).logBase2();
+ SDValue VecShiftAmt = DAG.getConstant(ShiftAmt, DL, VT);
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, VT, LHS, VecShiftAmt);
+
+ // Emit: (LHS << ShiftAmt) +- LHS
+ if (NeedsAdd) {
+ return DAG.getNode(ISD::ADD, DL, VT, ShiftNode, LHS);
+ }
+ return DAG.getNode(ISD::SUB, DL, VT, ShiftNode, LHS);
+}
+
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -5965,7 +6003,8 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
// legal.
if (Subtarget->hasSVE())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
- // Fall through to expand this. It is not legal.
+ if (SDValue ShlAdd = convertMulToShlAdd(Op.getNode(), DAG))
+ return ShlAdd;
return SDValue();
} else
// Other vector multiplications are legal.
@@ -20434,44 +20473,6 @@ static SDValue performVectorExtCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// Transform mul<v2i64, splat(2^n +-1)> into a SHL and ADD/SUB
-// this transormation is much faster when vector mul is not supported
-static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
- const SDNode *Operand = N->getOperand(1).getNode();
- APInt SplatValue;
- ISD::isConstantSplatVector(Operand, SplatValue);
-
- // Not a constant splat so should just stay as a mulitplcation operation
- if (!SplatValue.getBoolValue())
- return SDValue();
-
- // If (Value - 1) is a power of 2, we need an ADD (e.g., 257)
- bool NeedsAdd = (SplatValue - 1).isPowerOf2();
- bool NeedsSub = (SplatValue + 1).isPowerOf2();
-
- // If the constant is not (2^n + 1) or (2^n - 1), it would require
- // more than one addition/subtraction. For v2i64, the cost of
- // multiple vector adds/shifts often exceeds the cost of
- // scalarization (moving to GPRs to use a single MUL).
- if (!NeedsSub && !NeedsAdd)
- return SDValue();
-
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- SDValue LHS = N->getOperand(0);
-
- unsigned ShiftAmt =
- NeedsAdd ? (SplatValue - 1).logBase2() : (SplatValue + 1).logBase2();
- SDValue VecShiftAmt = DAG.getConstant(ShiftAmt, DL, VT);
- SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, VT, LHS, VecShiftAmt);
-
- // Emit: (LHS << ShiftAmt) +- LHS
- if (NeedsAdd) {
- return DAG.getNode(ISD::ADD, DL, VT, ShiftNode, LHS);
- }
- return DAG.getNode(ISD::SUB, DL, VT, ShiftNode, LHS);
-}
-
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -20482,11 +20483,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return Ext;
if (SDValue Ext = performVectorExtCombine(N, DAG))
return Ext;
- if (Subtarget->isNeonAvailable() && N->getValueType(0) == MVT::v2i64) {
- if (SDValue Ext = convertMulToShlAdd(N, DAG))
- return Ext;
- }
-
if (DCI.isBeforeLegalizeOps())
return SDValue();
>From 46a09cb7ae6881bf8bdedbd2bcc756b6696dd6ca Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Sat, 7 Mar 2026 00:59:26 +0200
Subject: [PATCH 5/9] Address PR Comments
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c56c9c7aacc1b..f1c8ad24826d2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5920,14 +5920,14 @@ static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
}
// Transform mul<v2i64, splat(2^n +-1)> into a SHL and ADD/SUB
-// this transormation is much faster when vector mul is not supported
+// this transformation is much faster when vector mul is not supported
static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
const SDNode *Operand = N->getOperand(1).getNode();
APInt SplatValue;
- ISD::isConstantSplatVector(Operand, SplatValue);
- // Not a constant splat so should just stay as a mulitplcation operation
- if (!SplatValue.getBoolValue())
+ // Not a constant splat so should just stay as a multiplication operation
+ if (!ISD::isConstantSplatVector(Operand, SplatValue) ||
+ !SplatValue.getBoolValue())
return SDValue();
// If (Value - 1) is a power of 2, we need an ADD (e.g., 257)
@@ -5951,10 +5951,7 @@ static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, VT, LHS, VecShiftAmt);
// Emit: (LHS << ShiftAmt) +- LHS
- if (NeedsAdd) {
- return DAG.getNode(ISD::ADD, DL, VT, ShiftNode, LHS);
- }
- return DAG.getNode(ISD::SUB, DL, VT, ShiftNode, LHS);
+ return DAG.getNode(NeedsAdd ? ISD::ADD : ISD::SUB, DL, VT, ShiftNode, LHS);
}
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -6003,8 +6000,10 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
// legal.
if (Subtarget->hasSVE())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
+ // Try to optimize the mul to a shift left and add instead of scalarizing.
if (SDValue ShlAdd = convertMulToShlAdd(Op.getNode(), DAG))
return ShlAdd;
+ // Fall through to expanding as the mul is not legal.
return SDValue();
} else
// Other vector multiplications are legal.
>From 01505b3614e75844376da3f032e533bf4cd154dc Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Sun, 15 Mar 2026 20:01:53 +0200
Subject: [PATCH 6/9] Add the 2 SHL optimization if the number is splittable
into two whole roots LogBase2
---
.../Target/AArch64/AArch64ISelLowering.cpp | 72 ++++++++++++++-----
1 file changed, 53 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f1c8ad24826d2..0a41e4246745c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5919,8 +5919,21 @@ static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
return 0;
}
-// Transform mul<v2i64, splat(2^n +-1)> into a SHL and ADD/SUB
-// this transformation is much faster when vector mul is not supported
+// Transform mul<v2i64, splat(const)> into a SHL and ADD/SUB
+// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+// mul x, (2^N + 1) --> add (shl x, N), x
+// mul x, (2^N - 1) --> sub (shl x, N), x
+// Examples: x * 33 --> (x << 5) + x
+// x * 15 --> (x << 4) - x
+// x * -33 --> -((x << 5) + x)
+// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+// (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
+// mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
+// mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
+// Examples: x * 0x8800 --> (x << 15) + (x << 11)
+// x * 0xf800 --> (x << 16) - (x << 11)
+// x * -0x8800 --> -((x << 15) + (x << 11))
+// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
const SDNode *Operand = N->getOperand(1).getNode();
APInt SplatValue;
@@ -5930,28 +5943,49 @@ static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
!SplatValue.getBoolValue())
return SDValue();
- // If (Value - 1) is a power of 2, we need an ADD (e.g., 257)
- bool NeedsAdd = (SplatValue - 1).isPowerOf2();
- bool NeedsSub = (SplatValue + 1).isPowerOf2();
+ bool IsNegative = SplatValue.isNegative();
+ SplatValue = SplatValue.abs();
+ // Placeholder for MathOp
+ unsigned MathOp = ISD::DELETED_NODE;
+ // The constant `2` should be treated as (2^0 + 1).
+ unsigned TZeros = SplatValue == 2 ? 0 : SplatValue.countr_zero();
+
+ // Shift the splat value by all the zeros , this won't affect the parrity
+ // this will help us find the first and second multiple to use.
+ SplatValue.lshrInPlace(TZeros);
+
+ if ((SplatValue - 1).isPowerOf2())
+ MathOp = ISD::ADD;
+ else if ((SplatValue + 1).isPowerOf2())
+ MathOp = ISD::SUB;
// If the constant is not (2^n + 1) or (2^n - 1), it would require
// more than one addition/subtraction. For v2i64, the cost of
// multiple vector adds/shifts often exceeds the cost of
// scalarization (moving to GPRs to use a single MUL).
- if (!NeedsSub && !NeedsAdd)
- return SDValue();
-
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- SDValue LHS = N->getOperand(0);
-
- unsigned ShiftAmt =
- NeedsAdd ? (SplatValue - 1).logBase2() : (SplatValue + 1).logBase2();
- SDValue VecShiftAmt = DAG.getConstant(ShiftAmt, DL, VT);
- SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, VT, LHS, VecShiftAmt);
-
- // Emit: (LHS << ShiftAmt) +- LHS
- return DAG.getNode(NeedsAdd ? ISD::ADD : ISD::SUB, DL, VT, ShiftNode, LHS);
+ if (MathOp != ISD::DELETED_NODE) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+
+ unsigned ShiftAmt = MathOp == ISD::ADD ? (SplatValue - 1).logBase2()
+ : (SplatValue + 1).logBase2();
+ ShiftAmt += TZeros;
+
+ // assert(ShiftAmt < BitWidth &&
+ // "multiply-by-constant generated out of bounds shift");
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, LHS, DAG.getConstant(ShiftAmt, DL, VT));
+ SDValue Combined =
+ TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
+ DAG.getNode(ISD::SHL, DL, VT, LHS,
+ DAG.getConstant(TZeros, DL, VT)))
+ : DAG.getNode(MathOp, DL, VT, Shl, LHS);
+ if (IsNegative)
+ Combined = DAG.getNegative(Combined, DL, VT);
+ return Combined;
+ }
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
>From 5ba22f58408841d240180994c6c49031ff7d4af4 Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Sun, 15 Mar 2026 20:08:15 +0200
Subject: [PATCH 7/9] Amend the tests to cover the new cases as well as the
negatives.
---
llvm/test/CodeGen/AArch64/neon-mul-shl.ll | 52 +++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/neon-mul-shl.ll b/llvm/test/CodeGen/AArch64/neon-mul-shl.ll
index 69890260b840e..95d1da84ff5d4 100644
--- a/llvm/test/CodeGen/AArch64/neon-mul-shl.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mul-shl.ll
@@ -31,6 +31,58 @@ define <2 x i64> @mul_v2i64_9(<2 x i64> %v) {
ret <2 x i64> %mul
}
+define <2 x i64> @mul_v2i64_neg_33(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_neg_33:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.2d, v0.2d, #5
+; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: neg v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 -33)
+ ret <2 x i64> %mul
+}
+
+define <2 x i64> @mul_v2i64_neg_15(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_neg_15:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.2d, v0.2d, #4
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 -15)
+ ret <2 x i64> %mul
+}
+define <2 x i64> @mul_v2i64_8800(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_8800:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.2d, v0.2d, #11
+; CHECK-NEXT: shl v0.2d, v0.2d, #15
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 34816) ; 0x8800
+ ret <2 x i64> %mul
+}
+
+define <2 x i64> @mul_v2i64_f800(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_f800:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.2d, v0.2d, #11
+; CHECK-NEXT: shl v0.2d, v0.2d, #16
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 63488) ; 0xf800
+ ret <2 x i64> %mul
+}
+
+;; This should not be optimized as it doesn't lower to a mul
+define <2 x i64> @mul_v2i64_256(<2 x i64> %v) {
+; CHECK-LABEL: mul_v2i64_256:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v0.2d, v0.2d, #8
+; CHECK-NEXT: ret
+ %mul = mul <2 x i64> %v, splat (i64 256)
+ ret <2 x i64> %mul
+}
+
;; This should not be optimized
define <2 x i64> @mul_v2i64_13_no_opt(<2 x i64> %v) {
; CHECK-LABEL: mul_v2i64_13_no_opt:
>From deaaad3377dec55d5f1f18efcfdebf6bab0d90fa Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Tue, 17 Mar 2026 02:25:17 +0200
Subject: [PATCH 8/9] Address PR Comments
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0a41e4246745c..53eca84591ed6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5947,10 +5947,9 @@ static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
SplatValue = SplatValue.abs();
// Placeholder for MathOp
unsigned MathOp = ISD::DELETED_NODE;
- // The constant `2` should be treated as (2^0 + 1).
- unsigned TZeros = SplatValue == 2 ? 0 : SplatValue.countr_zero();
+ unsigned TZeros = SplatValue.countr_zero();
- // Shift the splat value by all the zeros , this won't affect the parrity
+ // Shift the splat value by all the zeros, this won't affect the parrity
// this will help us find the first and second multiple to use.
SplatValue.lshrInPlace(TZeros);
@@ -5972,15 +5971,14 @@ static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
: (SplatValue + 1).logBase2();
ShiftAmt += TZeros;
- // assert(ShiftAmt < BitWidth &&
- // "multiply-by-constant generated out of bounds shift");
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, LHS, DAG.getConstant(ShiftAmt, DL, VT));
+
+ SDValue DoubleShl = DAG.getNode(
+ MathOp, DL, VT, Shl,
+ DAG.getNode(ISD::SHL, DL, VT, LHS, DAG.getConstant(TZeros, DL, VT)));
SDValue Combined =
- TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
- DAG.getNode(ISD::SHL, DL, VT, LHS,
- DAG.getConstant(TZeros, DL, VT)))
- : DAG.getNode(MathOp, DL, VT, Shl, LHS);
+ TZeros ? DoubleShl : DAG.getNode(MathOp, DL, VT, Shl, LHS);
if (IsNegative)
Combined = DAG.getNegative(Combined, DL, VT);
return Combined;
>From ed41fe2e7aa6bfc1a28e1f2682d5a3ecc5a564f9 Mon Sep 17 00:00:00 2001
From: AlyElashram <alyahelashram at gmail.com>
Date: Wed, 25 Mar 2026 22:55:21 +0200
Subject: [PATCH 9/9] Format as newLHS and combined
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9a49309212121..8602422e35974 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5974,7 +5974,7 @@ static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
unsigned MathOp = ISD::DELETED_NODE;
unsigned TZeros = SplatValue.countr_zero();
- // Shift the splat value by all the zeros, this won't affect the parrity
+ // Shift the splat value by all the zeros, this won't affect the parity
// this will help us find the first and second multiple to use.
SplatValue.lshrInPlace(TZeros);
@@ -5999,11 +5999,10 @@ static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG) {
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, LHS, DAG.getConstant(ShiftAmt, DL, VT));
- SDValue DoubleShl = DAG.getNode(
- MathOp, DL, VT, Shl,
- DAG.getNode(ISD::SHL, DL, VT, LHS, DAG.getConstant(TZeros, DL, VT)));
- SDValue Combined =
- TZeros ? DoubleShl : DAG.getNode(MathOp, DL, VT, Shl, LHS);
+ SDValue NewLHS = TZeros ? DAG.getNode(ISD::SHL, DL, VT, LHS,
+ DAG.getConstant(TZeros, DL, VT))
+ : LHS;
+ SDValue Combined = DAG.getNode(MathOp, DL, VT, Shl, NewLHS);
if (IsNegative)
Combined = DAG.getNegative(Combined, DL, VT);
return Combined;
More information about the llvm-commits
mailing list