[llvm] aaa37af - [LLVM][CodeGen][SVE] Add lowering for ISD::[ANY,SIGN,ZERO]_EXTEND_VECTOR_INREG. (#169847)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 03:31:57 PST 2025
Author: Paul Walker
Date: 2025-12-02T11:31:52Z
New Revision: aaa37afbc283aef885afc779dcb1539a3b3775e6
URL: https://github.com/llvm/llvm-project/commit/aaa37afbc283aef885afc779dcb1539a3b3775e6
DIFF: https://github.com/llvm/llvm-project/commit/aaa37afbc283aef885afc779dcb1539a3b3775e6.diff
LOG: [LLVM][CodeGen][SVE] Add lowering for ISD::[ANY,SIGN,ZERO]_EXTEND_VECTOR_INREG. (#169847)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/sve-sext-zext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2ce8f6d924a78..dceb36c772cb3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1590,6 +1590,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::AVGCEILS, VT, Custom);
setOperationAction(ISD::AVGCEILU, VT, Custom);
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+
if (!Subtarget->isLittleEndian())
setOperationAction(ISD::BITCAST, VT, Custom);
@@ -7858,6 +7862,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return LowerEXTEND_VECTOR_INREG(Op, DAG);
case ISD::ZERO_EXTEND_VECTOR_INREG:
return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::VECTOR_SHUFFLE:
@@ -14690,6 +14697,40 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
}
+SDValue
+AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ assert(VT.isScalableVector() && "Unexpected result type!");
+
+ bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
+ unsigned UnpackOpcode = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+
+ // Repeatedly unpack Val until the result is of the desired type.
+ SDValue Val = Op.getOperand(0);
+ switch (Val.getSimpleValueType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::nxv16i8:
+ Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv8i16, Val);
+ if (VT == MVT::nxv8i16)
+ break;
+ [[fallthrough]];
+ case MVT::nxv8i16:
+ Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv4i32, Val);
+ if (VT == MVT::nxv4i32)
+ break;
+ [[fallthrough]];
+ case MVT::nxv4i32:
+ Val = DAG.getNode(UnpackOpcode, DL, MVT::nxv2i64, Val);
+ assert(VT == MVT::nxv2i64 && "Unexpected result type!");
+ break;
+ }
+
+ return Val;
+}
+
// Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
// but we don't have an appropriate instruction,
// so custom-lower it as ZIP1-with-zeros.
@@ -14698,6 +14739,10 @@ AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
+
+ if (VT.isScalableVector())
+ return LowerEXTEND_VECTOR_INREG(Op, DAG);
+
SDValue SrcOp = Op.getOperand(0);
EVT SrcVT = SrcOp.getValueType();
assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
@@ -28876,7 +28921,8 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
return;
- unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
+ unsigned Opcode = (Index == 0) ? (unsigned)ISD::ANY_EXTEND_VECTOR_INREG
+ : (unsigned)AArch64ISD::UUNPKHI;
EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 32aa913181a21..b6d34f97c7b48 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -714,6 +714,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
index 88e13ea1e0fa4..845628a91498b 100644
--- a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
@@ -456,3 +456,131 @@ define <vscale x 2 x i64> @zext_i18_i64(<vscale x 2 x i18> %a) {
%r = zext <vscale x 2 x i18> %a to <vscale x 2 x i64>
ret <vscale x 2 x i64> %r
}
+
+define <vscale x 8 x i16> @sext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_inreg_i16_from_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sunpklo z0.h, z0.b
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+ %sext = sext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %sext
+}
+
+define <vscale x 4 x i32> @sext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_inreg_i32_from_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sunpklo z0.h, z0.b
+; CHECK-NEXT: sunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+ %sext = sext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %sext
+}
+
+define <vscale x 4 x i32> @sext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sext_inreg_i32_from_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+ %sext = sext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %sext
+}
+
+define <vscale x 2 x i64> @sext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sext_inreg_i64_from_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sunpklo z0.h, z0.b
+; CHECK-NEXT: sunpklo z0.s, z0.h
+; CHECK-NEXT: sunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+ %sext = sext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %sext
+}
+
+define <vscale x 2 x i64> @sext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sext_inreg_i64_from_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sunpklo z0.s, z0.h
+; CHECK-NEXT: sunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+ %sext = sext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %sext
+}
+
+define <vscale x 2 x i64> @sext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sext_inreg_i64_from_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
+ %sext = sext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %sext
+}
+
+define <vscale x 8 x i16> @zext_inreg_i16_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_inreg_i16_from_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 8 x i8> @llvm.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+ %zext = zext <vscale x 8 x i8> %subvec to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %zext
+}
+
+define <vscale x 4 x i32> @zext_inreg_i32_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_inreg_i32_from_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 4 x i8> @llvm.vector.extract.nxv4i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+ %zext = zext <vscale x 4 x i8> %subvec to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %zext
+}
+
+define <vscale x 4 x i32> @zext_inreg_i32_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: zext_inreg_i32_from_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+ %zext = zext <vscale x 4 x i16> %subvec to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %zext
+}
+
+define <vscale x 2 x i64> @zext_inreg_i64_from_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: zext_inreg_i64_from_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 2 x i8> @llvm.vector.extract.nxv2i8.nxv16i8(<vscale x 16 x i8> %a, i64 0)
+ %zext = zext <vscale x 2 x i8> %subvec to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %zext
+}
+
+define <vscale x 2 x i64> @zext_inreg_i64_from_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: zext_inreg_i64_from_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
+ %zext = zext <vscale x 2 x i16> %subvec to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %zext
+}
+
+define <vscale x 2 x i64> @zext_inreg_i64_from_i32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: zext_inreg_i64_from_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %subvec = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %a, i64 0)
+ %zext = zext <vscale x 2 x i32> %subvec to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %zext
+}
More information about the llvm-commits
mailing list