[llvm] [AArch64] Fold scalar-to-vector shuffles into DUP/FMOV (PR #166962)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 07:59:49 PST 2025
================
@@ -15654,6 +15654,56 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
+ // 128-bit NEON integer vectors:
+ // If BUILD_VECTOR has low half == splat(lane 0) and high half == zero,
+ // build the low half and return SUBREG_TO_REG(0, Lo, dsub).
+ // This avoids INSERT_VECTOR_ELT chains and lets later passes assume the
+ // other lanes are zero.
+ if (VT.isFixedLengthVector() && VT.getSizeInBits() == 128) {
+ EVT LaneVT = VT.getVectorElementType();
+ if (LaneVT.isInteger()) {
+ const unsigned HalfElts = NumElts >> 1;
+ SDValue FirstVal = Op.getOperand(0);
+
+ auto IsZero = [&](SDValue V) { return isNullConstant(V); };
+
+ bool IsLoSplatHiZero = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Vi = Op.getOperand(i);
+ bool violates = (i < HalfElts) ? (Vi != FirstVal) : !IsZero(Vi);
+ if (violates) {
+ IsLoSplatHiZero = false;
+ break;
+ }
+ }
+
+ if (IsLoSplatHiZero) {
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned LaneBits = LaneVT.getSizeInBits();
+
+ auto buildSubregToReg = [&](SDValue LoHalf) -> SDValue {
+ SDValue ZeroImm = DAG.getTargetConstant(0, DL, MVT::i32);
+ SDValue SubIdx = DAG.getTargetConstant(AArch64::dsub, DL, MVT::i32);
+ SDNode *N = DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, VT,
----------------
Lukacma wrote:
Yeah we have tried different nodes here, but we always ended up with redundant moves for some cases. That's why we went with SUBREG_TO_REG. We could create a new ISD node to use here to take care of layering concern here if you would like though.
https://github.com/llvm/llvm-project/pull/166962
More information about the llvm-commits
mailing list