[llvm] e4a4bb0 - [PowerPC] Replace vspltisw+vadduwm instructions with xxleqv+vsubuwm for adding the vector {1, 1, 1, 1} (#160882)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 22:57:03 PST 2025
Author: Himadhith
Date: 2025-11-21T12:26:58+05:30
New Revision: e4a4bb0f6d3b3b9f3ccba79202a9074415b7dd71
URL: https://github.com/llvm/llvm-project/commit/e4a4bb0f6d3b3b9f3ccba79202a9074415b7dd71
DIFF: https://github.com/llvm/llvm-project/commit/e4a4bb0f6d3b3b9f3ccba79202a9074415b7dd71.diff
LOG: [PowerPC] Replace vspltisw+vadduwm instructions with xxleqv+vsubuwm for adding the vector {1, 1, 1, 1} (#160882)
This patch optimizes vector addition operations involving **`all-ones`**
vectors by leveraging the generation of vectors of -1s(using `xxleqv`,
which is cheaper than generating vectors of 1s(`vspltisw`). These are
the respective vector types.
`v2i64`: **`A + vector {1, 1}`**
`v4i32`: **`A + vector {1, 1, 1, 1}`**
`v8i16`: **`A + vector {1, 1, 1, 1, 1, 1, 1, 1}`**
`v16i8`: **`A + vector {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1}`**
The optimized version replaces `vspltisw (4 cycles)` with `xxleqv (2
cycles)` using the following identity:
`A - (-1) = A + 1`.
---------
Co-authored-by: himadhith <himadhith.v at ibm.com>
Co-authored-by: Tony Varghese <tonypalampalliyil at gmail.com>
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll
llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cc010c05e5aa2..9283ec1dff557 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -19280,6 +19280,49 @@ static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
return MatPCRel;
}
+// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
+// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
+// Mathematical identity: X + 1 = X - (-1)
+// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
+// Requirement: VSX feature for efficient xxleqv generation
+static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+
+ EVT VT = N->getValueType(0);
+ if (!Subtarget.hasVSX())
+ return SDValue();
+
+ // Handle v2i64, v4i32, v8i16 and v16i8 types
+ if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
+ VT == MVT::v2i64))
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Check if RHS is BUILD_VECTOR
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check if all the elements are 1
+ unsigned NumOfEles = RHS.getNumOperands();
+ for (unsigned i = 0; i < NumOfEles; ++i) {
+ auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+ if (!CN || CN->getSExtValue() != 1)
+ return SDValue();
+ }
+ SDLoc DL(N);
+
+ SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
+ SmallVector<SDValue, 4> Ops(4, MinusOne);
+ SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
+
+ // Bitcast to the target vector type
+ SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
+
+ return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
+}
+
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
return Value;
@@ -19287,6 +19330,8 @@ SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
return Value;
+ if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
+ return Value;
return SDValue();
}
diff --git a/llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll b/llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll
index e67d031b1813f..4ec54fa8a0dee 100644
--- a/llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll
+++ b/llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll
@@ -8,15 +8,14 @@
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-; The addition of vector `A` with vector of 1s currently uses `vspltisw` to generate vector of 1s followed by add operation.
+; Optimized version which `xxleqv` and `vsubu` to generate vector of -1s to leverage the identity A - (-1) = A + 1.
; Function for the vector type v2i64 `a + {1, 1}`
define <2 x i64> @test_v2i64(<2 x i64> %a) {
; CHECK-LABEL: test_v2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vspltisw v3, 1
-; CHECK-NEXT: vupklsw v3, v3
-; CHECK-NEXT: vaddudm v2, v2, v3
+; CHECK-NEXT: xxleqv v3, v3, v3
+; CHECK-NEXT: vsubudm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <2 x i64> %a, splat (i64 1)
@@ -27,8 +26,8 @@ entry:
define <4 x i32> @test_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vspltisw v3, 1
-; CHECK-NEXT: vadduwm v2, v2, v3
+; CHECK-NEXT: xxleqv v3, v3, v3
+; CHECK-NEXT: vsubuwm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <4 x i32> %a, splat (i32 1)
@@ -39,8 +38,8 @@ entry:
define <8 x i16> @test_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_v8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vspltish v3, 1
-; CHECK-NEXT: vadduhm v2, v2, v3
+; CHECK-NEXT: xxleqv v3, v3, v3
+; CHECK-NEXT: vsubuhm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <8 x i16> %a, splat (i16 1)
@@ -51,8 +50,8 @@ entry:
define <16 x i8> @test_16i8(<16 x i8> %a) {
; CHECK-LABEL: test_16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxspltib v3, 1
-; CHECK-NEXT: vaddubm v2, v2, v3
+; CHECK-NEXT: xxleqv v3, v3, v3
+; CHECK-NEXT: vsububm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <16 x i8> %a, splat (i8 1)
diff --git a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
index 210aee13486c3..d56b1be539b05 100644
--- a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
@@ -16,9 +16,8 @@ define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
; VSX-LABEL: increment_by_one:
; VSX: # %bb.0:
-; VSX-NEXT: vspltisw 3, 1
-; VSX-NEXT: vupklsw 3, 3
-; VSX-NEXT: vaddudm 2, 2, 3
+; VSX-NEXT: xxleqv 35, 35, 35
+; VSX-NEXT: vsubudm 2, 2, 3
; VSX-NEXT: blr
;
; NOVSX-LABEL: increment_by_one:
More information about the llvm-commits
mailing list