[llvm] [LLVM][CodeGen] Remove failure cases when widening EXTRACT/INSERT_SUBVECTOR. (PR #162308)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 7 08:48:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
This PR implements catch all handling for widening the scalable subvector operand (INSERT_SUBVECTOR) or result (EXTRACT_SUBVECTOR). It does this via the stack using masked memory operations. With general handling available we can add optimiations for specific cases.
---
Patch is 41.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162308.diff
5 Files Affected:
- (modified) llvm/include/llvm/CodeGen/SelectionDAG.h (+6)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+71-19)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+8)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll (+191-62)
- (modified) llvm/test/CodeGen/AArch64/sve-insert-vector.ll (+242-45)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index d9d6f0bcdcb84..f935442404cde 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1186,6 +1186,12 @@ class SelectionDAG {
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
bool ConstantFold = true);
+ /// Return a vector with the first 'Len' lanes set to true and remaining lanes
+ /// set to false. The mask's ValueType is the same as when comparing vectors
+ /// of type VT.
+ LLVM_ABI SDValue getMaskFromElementCount(const SDLoc &DL, EVT VT,
+ ElementCount Len);
+
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 87d5453cd98cf..bcaac40de5459 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6201,8 +6201,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
- report_fatal_error("Don't know how to widen the result of "
- "EXTRACT_SUBVECTOR for scalable vectors");
+ // Fallback to extracting through memory.
+
+ Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false);
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+ MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+
+ // Write out the input vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO);
+
+ // Build a mask to match the length of the non-widened result.
+ SDValue Mask =
+ DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount());
+
+ // Read back the sub-vector setting the remaining lanes to poison.
+ StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx);
+ return DAG.getMaskedLoad(
+ WidenVT, dl, Ch, StackPtr, DAG.getUNDEF(StackPtr.getValueType()), Mask,
+ DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
}
// We could try widening the input to the right length but for now, extract
@@ -6306,11 +6331,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
if (VT.isVector()) {
// If all else fails replace the load with a wide masked load.
SDLoc DL(N);
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
-
- SDValue Len = DAG.getElementCount(DL, IdxVT, VT.getVectorElementCount());
- SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
- DAG.getConstant(0, DL, IdxVT), Len);
+ SDValue Mask =
+ DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount());
SDValue NewLoad = DAG.getMaskedLoad(
WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
@@ -7447,9 +7469,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
SDValue InVec = N->getOperand(0);
EVT OrigVT = SubVec.getValueType();
- if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
- SubVec = GetWidenedVector(SubVec);
-
+ SubVec = GetWidenedVector(SubVec);
EVT SubVT = SubVec.getValueType();
// Whether or not all the elements of the widened SubVec will be inserted into
@@ -7471,17 +7491,52 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
}
}
+ if (!IndicesValid)
+ report_fatal_error(
+ "Don't know how to widen the operands for INSERT_SUBVECTOR");
+
SDLoc DL(N);
// We need to make sure that the indices are still valid, otherwise we might
// widen what was previously well-defined to something undefined.
- if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
+ if (InVec.isUndef() && N->getConstantOperandVal(2) == 0)
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
N->getOperand(2));
- if (!IndicesValid || OrigVT.isScalableVector())
- report_fatal_error(
- "Don't know how to widen the operands for INSERT_SUBVECTOR");
+ if (OrigVT.isScalableVector()) {
+ // Fallback to inserting through memory.
+
+ Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
+ SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+ MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+
+ // Write out the vector being inserting into.
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO);
+
+ // Build a mask to match the length of the sub-vector.
+ SDValue Mask =
+ DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
+
+ // Overwrite the sub-vector at the required offset.
+ StackPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
+ Ch = DAG.getMaskedStore(Ch, DL, SubVec, StackPtr,
+ DAG.getUNDEF(StackPtr.getValueType()), Mask, VT,
+ StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
+
+ // Read back the result.
+ return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO);
+ }
// If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
// with a series of INSERT_VECTOR_ELT
@@ -7560,12 +7615,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (StVT.isVector()) {
// If all else fails replace the store with a wide masked store.
SDLoc DL(N);
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
-
SDValue WideStVal = GetWidenedVector(StVal);
- SDValue Len = DAG.getElementCount(DL, IdxVT, StVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
- DAG.getConstant(0, DL, IdxVT), Len);
+ SDValue Mask =
+ DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount());
return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
ST->getOffset(), Mask, ST->getMemoryVT(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 95f53fe0bfdba..d976c0ce1b901 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2112,6 +2112,14 @@ SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
return getConstant(EC.getKnownMinValue(), DL, VT);
}
+SDValue SelectionDAG::getMaskFromElementCount(const SDLoc &DL, EVT DataVT,
+ ElementCount EC) {
+ EVT IdxVT = TLI->getVectorIdxTy(getDataLayout());
+ EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), DataVT);
+ return getNode(ISD::GET_ACTIVE_LANE_MASK, DL, MaskVT,
+ getConstant(0, DL, IdxVT), getElementCount(DL, IdxVT, EC));
+}
+
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
APInt One(ResVT.getScalarSizeInBits(), 1);
return getStepVector(DL, ResVT, One);
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index 4aaa25e5e66c5..8d0c71502f1e3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -3,12 +3,199 @@
; Extracting illegal subvectors
-define <vscale x 1 x i32> @extract_nxv1i32_nxv4i32(<vscale x 4 x i32> %vec) nounwind {
-; CHECK-LABEL: extract_nxv1i32_nxv4i32:
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x i32> @extract_nxv1i32_nxv4i32_0(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1i32_nxv4i32_0:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %retval = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
- ret <vscale x 1 x i32> %retval
+ %e = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
+ %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %e, i64 0)
+ ret <vscale x 4 x i32> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x i32> @extract_nxv1i32_nxv4i32_1(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1i32_nxv4i32_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str z0, [sp]
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: add x8, x9, x8
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %vec, i64 1)
+ %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %e, i64 0)
+ ret <vscale x 4 x i32> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x i32> @extract_nxv1i32_nxv4i32_2(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1i32_nxv4i32_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str z0, [sp]
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: cnth x8
+; CHECK-NEXT: add x8, x9, x8
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %vec, i64 2)
+ %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %e, i64 0)
+ ret <vscale x 4 x i32> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x i32> @extract_nxv1i32_nxv4i32_3(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1i32_nxv4i32_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str z0, [sp]
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: cntw x8, all, mul #3
+; CHECK-NEXT: add x8, x9, x8
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %vec, i64 3)
+ %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %e, i64 0)
+ ret <vscale x 4 x i32> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 2 x float> @extract_nxv1f32_nxv2f32_0(<vscale x 2 x float> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1f32_nxv2f32_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x float> @llvm.vector.extract.nxv1f32.nxv2f32(<vscale x 2 x float> %vec, i64 0)
+ %retval = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.nxv1f32(<vscale x 2 x float> poison, <vscale x 1 x float> %e, i64 0)
+ ret <vscale x 2 x float> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 2 x float> @extract_nxv1f32_nxv2f32_1(<vscale x 2 x float> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1f32_nxv2f32_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: addpl x9, sp, #4
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: add x8, x9, x8
+; CHECK-NEXT: ld1w { z0.d }, p1/z, [x8]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x float> @llvm.vector.extract.nxv1f32.nxv2f32(<vscale x 2 x float> %vec, i64 1)
+ %retval = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.nxv1f32(<vscale x 2 x float> poison, <vscale x 1 x float> %e, i64 0)
+ ret <vscale x 2 x float> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x float> @extract_nxv1f32_nxv4f32_0(<vscale x 4 x float> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1f32_nxv4f32_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x float> @llvm.vector.extract.nxv1f32.nxv4f32(<vscale x 4 x float> %vec, i64 0)
+ %retval = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv1f32(<vscale x 4 x float> poison, <vscale x 1 x float> %e, i64 0)
+ ret <vscale x 4 x float> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x float> @extract_nxv1f32_nxv4f32_1(<vscale x 4 x float> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1f32_nxv4f32_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str z0, [sp]
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: add x8, x9, x8
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x float> @llvm.vector.extract.nxv1f32.nxv4f32(<vscale x 4 x float> %vec, i64 1)
+ %retval = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv1f32(<vscale x 4 x float> poison, <vscale x 1 x float> %e, i64 0)
+ ret <vscale x 4 x float> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x float> @extract_nxv1f32_nxv4f32_2(<vscale x 4 x float> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1f32_nxv4f32_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: str z0, [sp]
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x float> @llvm.vector.extract.nxv1f32.nxv4f32(<vscale x 4 x float> %vec, i64 2)
+ %retval = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv1f32(<vscale x 4 x float> poison, <vscale x 1 x float> %e, i64 0)
+ ret <vscale x 4 x float> %retval
+}
+
+; NOTE: Insert sub-vector into a legal type to avoid relying on an undefined
+; calling convention.
+define <vscale x 4 x float> @extract_nxv1f32_nxv4f32_3(<vscale x 4 x float> %vec) nounwind {
+; CHECK-LABEL: extract_nxv1f32_nxv4f32_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: str z0, [sp]
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: cntw x8, all, mul #3
+; CHECK-NEXT: add x8, x9, x8
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8]
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %e = call <vscale x 1 x float> @llvm.vector.extract.nxv1f32.nxv4f32(<vscale x 4 x float> %vec, i64 3)
+ %retval = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv1f32(<vscale x 4 x float> poison, <vscale x 1 x float> %e, i64 0)
+ ret <vscale x 4 x float> %retval
}
define <vscale x 1 x i16> @extract_nxv1i16_nxv6i16(<vscale x 6 x i16> %vec) nounwind {
@@ -19,9 +206,6 @@ define <vscale x 1 x i16> @extract_nxv1i16_nxv6i16(<vscale x 6 x i16> %vec) noun
ret <vscale x 1 x i16> %retval
}
-declare <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32>, i64)
-declare <vscale x 1 x i16> @llvm.vector.extract.nxv1i16.nxv6i16(<vscale x 6 x i16>, i64)
-
;
; Extract half i1 vector that needs promotion from legal type.
;
@@ -43,8 +227,6 @@ define <vscale x 8 x i1> @extract_nxv8i1_nxv16i1_8(<vscale x 16 x i1> %in) {
ret <vscale x 8 x i1> %res
}
-declare <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1>, i64)
-
;
; Extract i1 vector that needs widening from one that needs widening.
;
@@ -99,8 +281,6 @@ define <vscale x 14 x i1> @extract_nxv14i1_nxv28i1_14(<vscale x 28 x i1> %in) uw
ret <vscale x 14 x i1> %res
}
-declare <vscale x 14 x i1> @llvm.vector.extract.nxv14i1.nxv28i1(<vscale x 28 x i1>, i64)
-
;
; Extract half i1 vector that needs promotion from one that needs splitting.
;
@@ -140,8 +320,6 @@ define <vscale x 8 x i1> @extract_nxv8i1_nxv32i1_24(<vscale x 32 x i1> %in) {
ret <vscale x 8 x i1> %res
}
-declare <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv32i1(<vscale x 32 x i1>, i64)
-
;
; Extract 1/4th i1 vector that needs promotion from legal type.
;
@@ -185,8 +363,6 @@ define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_12(<vscale x 16 x i1> %in) {
ret <vscale x 4 x i1> %res
}
-declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
-
;
; Extract 1/8th i1 vector that needs promotion from legal type.
;
@@ -278,8 +454,6 @@ define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_14(<vscale x 16 x i1> %in) {
ret <vscale x 2 x i1> %res
}
-declare <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)
-
;
; Extract i1 vector that needs promotion from one that needs widening.
;
@@ -313,8 +487,6 @@ define <vscale x 4 x i1> @extract_nxv4i1_nxv12i1_8(<vscale x 12 x i1> %in) {
ret <vscale x 4 x i1> %res
}
-declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv12i1(<vscale x 12 x i1>, i64)
-
;
; Extract 1/8th i8 vector that needs promotion from legal type.
;
@@ -406,8 +578,6 @@ define <vscale x 2 x i8> @extract_nxv2i8_nxv16i8_14(<vscale x 16 x i8> %in) {
ret <vscale x 2 x i8> %res
}
-declare <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8>, i64)
-
;
; Extract i8 vector that...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/162308
More information about the llvm-commits
mailing list