[llvm] [SelectionDAG][Darwin] Convert insert 0 to AND with bitmask (PR #142428)
Julian Nagele via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 2 09:41:36 PDT 2025
https://github.com/juliannagele created https://github.com/llvm/llvm-project/pull/142428
This avoids a costly integer to vector register move.
>From acbb7b2842e41e43bcac1fcb19c1e0809ed38bb6 Mon Sep 17 00:00:00 2001
From: Julian Nagele <j.nagele at apple.com>
Date: Mon, 2 Jun 2025 17:31:32 +0100
Subject: [PATCH] [SelectionDAG][Darwin] Convert insert 0 to AND with bitmask
This avoids a costly integer to vector register move.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 60 ++++++++++++++++++-
llvm/test/CodeGen/AArch64/insertzero.ll | 27 +++++++++
.../AArch64/vec-combine-compare-to-bitmask.ll | 11 ++--
3 files changed, 91 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/insertzero.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ae34e6b7dcc3c..5b5cd49f963b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26057,11 +26057,67 @@ static SDValue removeRedundantInsertVectorElt(SDNode *N) {
return ExtractVec;
}
+// On Darwin, instead of explictly inserting 0 into a vector, which results in
+// a costly move from an integer to a vector register, use a bitmask to zero
+// out the corresponding lane.
+static SDValue convertInsertVectorEltToAnd(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
+
+ if (!Subtarget->isTargetDarwin())
+ return SDValue();
+
+ SDValue InsertVec = N->getOperand(0);
+ SDValue InsertVal = N->getOperand(1);
+ SDValue InsertIdx = N->getOperand(2);
+
+ ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(InsertIdx);
+
+ // Only handle constant 0 insertion into a known index.
+ if (!(isNullConstant(InsertVal) || isNullFPConstant(InsertVal)))
+ return SDValue();
+ if (!ConstIdx)
+ return SDValue();
+
+ unsigned Lane = ConstIdx->getZExtValue();
+
+ EVT VecVT = N->getValueType(0);
+ EVT IntVecVT = VecVT.changeVectorElementTypeToInteger();
+ EVT IntEltVT = IntVecVT.getVectorElementType();
+
+ if (DAG.NewNodesMustHaveLegalTypes)
+ IntEltVT = DAG.getTargetLoweringInfo().getTypeToTransformTo(
+ *DAG.getContext(), IntEltVT);
+
+ unsigned BitWidth = IntEltVT.getSizeInBits();
+ SDLoc DL(N);
+
+ // Bitcast original vector to integer type.
+ SDValue IntVec = DAG.getBitcast(IntVecVT, InsertVec);
+
+ // Build bitmask and AND.
+ SmallVector<SDValue> MaskElts;
+ for (unsigned I = 0; I < VecVT.getVectorNumElements(); ++I) {
+ APInt Val =
+ (I == Lane) ? APInt::getZero(BitWidth) : APInt::getAllOnes(BitWidth);
+ MaskElts.push_back(DAG.getConstant(Val, DL, IntEltVT));
+ }
+ SDValue Mask = DAG.getBuildVector(IntVecVT, DL, MaskElts);
+ SDValue AndVec = DAG.getNode(ISD::AND, DL, IntVecVT, IntVec, Mask);
+
+ // Bitcast result back to original type.
+ return DAG.getBitcast(VecVT, AndVec);
+}
+
static SDValue
-performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
if (SDValue Res = removeRedundantInsertVectorElt(N))
return Res;
+ if (SDValue Res = convertInsertVectorEltToAnd(N, DCI.DAG, Subtarget))
+ return Res;
+
return performPostLD1Combine(N, DCI, true);
}
@@ -26547,7 +26603,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::BSP:
return performBSPExpandForSVE(N, DAG, Subtarget);
case ISD::INSERT_VECTOR_ELT:
- return performInsertVectorEltCombine(N, DCI);
+ return performInsertVectorEltCombine(N, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DCI, Subtarget);
case ISD::VECREDUCE_ADD:
diff --git a/llvm/test/CodeGen/AArch64/insertzero.ll b/llvm/test/CodeGen/AArch64/insertzero.ll
new file mode 100644
index 0000000000000..9c9e288ee5d35
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/insertzero.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK-DARWIN
+; RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK
+
+define <4 x float> @insert_zero(ptr %a) {
+; CHECK-DARWIN-LABEL: insert_zero:
+; CHECK-DARWIN: ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT: Lloh0:
+; CHECK-DARWIN-NEXT: adrp x8, lCPI0_0 at PAGE
+; CHECK-DARWIN-NEXT: ldr q0, [x0]
+; CHECK-DARWIN-NEXT: Lloh1:
+; CHECK-DARWIN-NEXT: ldr q1, [x8, lCPI0_0 at PAGEOFF]
+; CHECK-DARWIN-NEXT: and.16b v0, v0, v1
+; CHECK-DARWIN-NEXT: ret
+; CHECK-DARWIN-NEXT: .loh AdrpLdr Lloh0, Lloh1
+;
+; CHECK-LABEL: insert_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: mov v0.s[3], wzr
+; CHECK-NEXT: ret
+
+entry:
+ %vec = load <4 x float>, ptr %a, align 4
+ %vec_set_lane = insertelement <4 x float> %vec, float 0.000000e+00, i64 3
+ ret <4 x float> %vec_set_lane
+}
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index 4e2ca082e28b5..ece53c315e55e 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -350,16 +350,17 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
; CHECK-SD-NEXT: cmeq.4s v1, v1, #0
; CHECK-SD-NEXT: adrp x8, lCPI8_0 at PAGE
; CHECK-SD-NEXT: movi d2, #0x000000ffffffff
-; CHECK-SD-NEXT: movi d3, #0x00ffffffffffff
+; CHECK-SD-NEXT: movi d3, #0x00ffffffff0000
+; CHECK-SD-NEXT: movi d4, #0x00ffffffffffff
; CHECK-SD-NEXT: bic.16b v0, v1, v0
; CHECK-SD-NEXT: movi d1, #0xffff0000ffff0000
; CHECK-SD-NEXT: xtn.4h v0, v0
; CHECK-SD-NEXT: orr.8b v0, v0, v2
-; CHECK-SD-NEXT: movi d2, #0x00ffffffff0000
+; CHECK-SD-NEXT: movi d2, #0xffff0000ffffffff
; CHECK-SD-NEXT: eor.8b v1, v0, v1
-; CHECK-SD-NEXT: eor.8b v0, v0, v2
-; CHECK-SD-NEXT: mov.h v1[2], wzr
-; CHECK-SD-NEXT: orr.8b v0, v0, v3
+; CHECK-SD-NEXT: eor.8b v0, v0, v3
+; CHECK-SD-NEXT: and.8b v1, v1, v2
+; CHECK-SD-NEXT: orr.8b v0, v0, v4
; CHECK-SD-NEXT: orr.8b v0, v1, v0
; CHECK-SD-NEXT: ldr d1, [x8, lCPI8_0 at PAGEOFF]
; CHECK-SD-NEXT: shl.4h v0, v0, #15
More information about the llvm-commits
mailing list