[llvm] [SDAG] Disable illegal extract_subvector splitting for scalable vectors (PR #170315)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 08:28:33 PST 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/170315
>From 2815ee55ebe53fa8116aee95b208e5f5178f1565 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 2 Dec 2025 15:31:49 +0000
Subject: [PATCH 1/3] Precommit test
Change-Id: If6082164fd0decd4933a4be3b6a2e40b93160316
---
.../sve-fixed-vector-extract-256-bits.ll | 41 +++++++++++++++++++
1 file changed, 41 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
new file mode 100644
index 0000000000000..93d573ba829b7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s -o - | FileCheck %s
+
+; Note: This test case is reduced from: https://github.com/llvm/llvm-project/pull/166748#issuecomment-3600498185
+
+define i32 @test_extract_v8i32_from_nxv8i32(<vscale x 8 x i32> %vec) {
+; CHECK-LABEL: test_extract_v8i32_from_nxv8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: sub x9, sp, #48
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov w9, v1.s[2]
+; CHECK-NEXT: mov w10, v1.s[1]
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: mov w11, v0.s[2]
+; CHECK-NEXT: mov z2.s, z1.s[3]
+; CHECK-NEXT: mov z3.s, z0.s[3]
+; CHECK-NEXT: fmov w12, s1
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: stp w10, w9, [sp, #20]
+; CHECK-NEXT: mov w9, v0.s[1]
+; CHECK-NEXT: fmov w10, s0
+; CHECK-NEXT: str s2, [sp, #28]
+; CHECK-NEXT: str s3, [sp, #12]
+; CHECK-NEXT: str w12, [sp, #16]
+; CHECK-NEXT: stp w9, w11, [sp, #4]
+; CHECK-NEXT: str w10, [sp]
+; CHECK-NEXT: ldr z0, [x8]
+; CHECK-NEXT: uaddv d0, p0, z0.s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: mov sp, x29
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %1 = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
+ %2 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %1)
+ ret i32 %2
+}
>From b577028becd93e5afa0c6c5ced5fb30fd0d111b8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 2 Dec 2025 15:33:30 +0000
Subject: [PATCH 2/3] [SDAG] Disable illegal extract_subvector splitting for
scalable vectors
The "half spanning" legalization of extract_subvector is only valid for
fixed-length vectors. This patch disables it and makes more careful use
of ElementCount in the lowering.
Fixes regression from #154101, which was encountered here:
https://github.com/llvm/llvm-project/pull/166748#issuecomment-3600498185
Note: We could optimize this case given the known vscale, but this patch
only attempts to fix the miscompile.
Change-Id: Ic580ae0ba95233c8b3dcf6b766760c1cf1533d52
---
.../SelectionDAG/LegalizeVectorTypes.cpp | 36 +++++++++++--------
.../sve-fixed-vector-extract-256-bits.ll | 32 +++++------------
2 files changed, 30 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 83796f20731ac..7bcbbc461b690 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3938,34 +3938,42 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
GetSplitVector(N->getOperand(0), Lo, Hi);
- uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements();
- uint64_t IdxVal = Idx->getAsZExtVal();
+ ElementCount LoElts = Lo.getValueType().getVectorElementCount();
+ ElementCount IdxVal =
+ ElementCount::get(Idx->getAsZExtVal(), SubVT.isScalableVector());
+ uint64_t IdxValMin = IdxVal.getKnownMinValue();
- unsigned NumResultElts = SubVT.getVectorMinNumElements();
+ EVT SrcVT = N->getOperand(0).getValueType();
+ ElementCount NumResultElts = SubVT.getVectorElementCount();
- if (IdxVal < LoEltsMin) {
- // If the extracted elements are all in the low half, do a simple extract.
- if (IdxVal + NumResultElts <= LoEltsMin)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ // If the extracted elements are all in the low half, do a simple extract.
+ if (ElementCount::isKnownLE(IdxVal + NumResultElts, LoElts))
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ unsigned LoEltsMin = LoElts.getKnownMinValue();
+ if (IdxValMin < LoEltsMin &&
+ !(SubVT.isScalableVector() || SrcVT.isScalableVector())) {
// Extracted subvector crosses vector split, so we need to blend the two
// halves.
// TODO: May be able to emit partial extract_subvector.
SmallVector<SDValue, 8> Elts;
- Elts.reserve(NumResultElts);
+ Elts.reserve(NumResultElts.getFixedValue());
- DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxVal,
- /*Count=*/LoEltsMin - IdxVal);
+ // This is not valid for scalable vectors. If SubVT is scalable, this is the
+ // same as unrolling a scalable dimension (invalid). If ScrVT is scalable,
+ // `Lo[LoEltsMin]` may not be the last element of `Lo`.
+ DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxValMin,
+ /*Count=*/LoEltsMin - IdxValMin);
DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0,
/*Count=*/SubVT.getVectorNumElements() -
Elts.size());
return DAG.getBuildVector(SubVT, dl, Elts);
}
- EVT SrcVT = N->getOperand(0).getValueType();
if (SubVT.isScalableVector() == SrcVT.isScalableVector()) {
- uint64_t ExtractIdx = IdxVal - LoEltsMin;
- if (ExtractIdx % NumResultElts == 0)
+ uint64_t ExtractIdx = IdxValMin - LoEltsMin;
+ unsigned NumResultEltsMin = NumResultElts.getKnownMinValue();
+ if (ExtractIdx % NumResultEltsMin == 0)
return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx);
// We cannot create an extract_subvector that isn't a multiple of the result
@@ -3973,7 +3981,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
// desired elements down to 0 and do a simple 0 extract.
EVT HiVT = Hi.getValueType();
SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1);
- for (int I = 0; I != static_cast<int>(NumResultElts); ++I)
+ for (int I = 0; I != static_cast<int>(NumResultEltsMin); ++I)
Mask[I] = ExtractIdx + I;
SDValue Shuffle =
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
index 93d573ba829b7..6cf0774a4269a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
@@ -6,34 +6,18 @@
define i32 @test_extract_v8i32_from_nxv8i32(<vscale x 8 x i32> %vec) {
; CHECK-LABEL: test_extract_v8i32_from_nxv8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: sub x9, sp, #48
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
-; CHECK-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: mov w9, v1.s[2]
-; CHECK-NEXT: mov w10, v1.s[1]
-; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: mov w11, v0.s[2]
-; CHECK-NEXT: mov z2.s, z1.s[3]
-; CHECK-NEXT: mov z3.s, z0.s[3]
-; CHECK-NEXT: fmov w12, s1
+; CHECK-NEXT: str z0, [sp]
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: stp w10, w9, [sp, #20]
-; CHECK-NEXT: mov w9, v0.s[1]
-; CHECK-NEXT: fmov w10, s0
-; CHECK-NEXT: str s2, [sp, #28]
-; CHECK-NEXT: str s3, [sp, #12]
-; CHECK-NEXT: str w12, [sp, #16]
-; CHECK-NEXT: stp w9, w11, [sp, #4]
-; CHECK-NEXT: str w10, [sp]
-; CHECK-NEXT: ldr z0, [x8]
+; CHECK-NEXT: ldr z0, [sp]
+; CHECK-NEXT: str z1, [sp, #1, mul vl]
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%1 = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
%2 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %1)
>From bc04bc25aebedf7ad809fcd29a841e5aef5136ec Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 2 Dec 2025 16:27:03 +0000
Subject: [PATCH 3/3] Fixups
Change-Id: I9383287eb01dd8404a9563b8beca098f4603b62b
---
.../SelectionDAG/LegalizeVectorTypes.cpp | 23 +++++++++++--------
.../sve-fixed-vector-extract-256-bits.ll | 4 +---
2 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7bcbbc461b690..a07ce28bc7139 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3971,18 +3971,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
}
if (SubVT.isScalableVector() == SrcVT.isScalableVector()) {
- uint64_t ExtractIdx = IdxValMin - LoEltsMin;
- unsigned NumResultEltsMin = NumResultElts.getKnownMinValue();
- if (ExtractIdx % NumResultEltsMin == 0)
- return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx);
-
- // We cannot create an extract_subvector that isn't a multiple of the result
- // size, which may go out of bounds for the last elements. Shuffle the
- // desired elements down to 0 and do a simple 0 extract.
+ ElementCount ExtractIdx = IdxVal - LoElts;
+ if (ExtractIdx.isKnownMultipleOf(NumResultElts))
+ return DAG.getExtractSubvector(dl, SubVT, Hi,
+ ExtractIdx.getKnownMinValue());
+
EVT HiVT = Hi.getValueType();
+ assert(HiVT.isFixedLengthVector() &&
+ "Only fixed-vector extracts are supported in this case");
+
+ // We cannot create an extract_subvector that isn't a multiple of the
+ // result size, which may go out of bounds for the last elements. Shuffle
+ // the desired elements down to 0 and do a simple 0 extract.
SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1);
- for (int I = 0; I != static_cast<int>(NumResultEltsMin); ++I)
- Mask[I] = ExtractIdx + I;
+ for (int I = 0; I != int(NumResultElts.getFixedValue()); ++I)
+ Mask[I] = int(ExtractIdx.getFixedValue()) + I;
SDValue Shuffle =
DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask);
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
index 6cf0774a4269a..71c9a941807a4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll
@@ -3,13 +3,11 @@
; Note: This test case is reduced from: https://github.com/llvm/llvm-project/pull/166748#issuecomment-3600498185
-define i32 @test_extract_v8i32_from_nxv8i32(<vscale x 8 x i32> %vec) {
+define i32 @test_extract_v8i32_from_nxv8i32(<vscale x 8 x i32> %vec) nounwind {
; CHECK-LABEL: test_extract_v8i32_from_nxv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
-; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: str z0, [sp]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr z0, [sp]
More information about the llvm-commits
mailing list