[llvm] [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to SVE (PR #173256)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 22 04:58:29 PST 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/173256
This is a follow-up to #171162, which broke the (untested) lowering of v4i16/f16 to SVE.
See https://github.com/llvm/llvm-project/pull/171162#discussion_r2601901376
>From df2a65094b580ab1b7adf7c2bc744172f56c1471 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 22 Dec 2025 12:49:33 +0000
Subject: [PATCH] [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to
SVE
This is a follow-up to #171162, which broke the (untested) lowering
of v4i16/f16 to SVE.
See https://github.com/llvm/llvm-project/pull/171162#discussion_r2601901376
---
.../SelectionDAG/LegalizeVectorOps.cpp | 33 +++++++++++++++++++
.../Target/AArch64/AArch64ISelLowering.cpp | 5 +++
.../CodeGen/AArch64/sve-vector-compress.ll | 32 ++++++++++++++++++
3 files changed, 70 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 22c5f7dffa80d..0cb45ea93346e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -191,6 +191,8 @@ class VectorLegalizer {
void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
bool NonArithmetic);
+ void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -705,6 +707,33 @@ void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
Results.push_back(Res);
}
+void VectorLegalizer::PromoteVECTOR_COMPRESS(
+ SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+ MVT PromotedVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT.getSimpleVT());
+ assert(PromotedVT.isInteger() && "Only promotion to integers supported");
+
+ SDValue Vec = Node->getOperand(0);
+ SDValue Mask = TLI.promoteTargetBoolean(DAG, Node->getOperand(1), PromotedVT);
+ SDValue Passthru = Node->getOperand(2);
+ if (VT.isFloatingPoint()) {
+ Vec = DAG.getBitcast(IntVT, Vec);
+ Passthru = DAG.getBitcast(IntVT, Passthru);
+ }
+ Vec = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Vec);
+ Passthru = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Passthru);
+
+ SDValue Result =
+ DAG.getNode(ISD::VECTOR_COMPRESS, DL, PromotedVT, Vec, Mask, Passthru);
+ Result = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Result);
+
+ if (VT.isFloatingPoint())
+ Result = DAG.getBitcast(VT, Result);
+ Results.push_back(Result);
+}
+
void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
@@ -745,6 +774,10 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMINIMUM:
PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
return;
+ case ISD::VECTOR_COMPRESS:
+ PromoteVECTOR_COMPRESS(Node, Results);
+ return;
+
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
// These operations are used to do promotion so they can't be promoted
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8c56071c5527d..41234566dcf01 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2003,6 +2003,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MVT::v2f64})
setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
+ // Promote v4i16/f16 to v4i32/f32 as the SVE container for v4i16 is nxv8,
+ // which is not supported with for compact (with only +sve).
+ setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4i16, MVT::v4i32);
+ setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4f16, MVT::v4i32);
+
for (auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
MVT::nxv4i32, MVT::nxv4f32}) {
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index fc46460cef758..034db474d9a41 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -205,6 +205,38 @@ define <4 x i32> @test_compress_v4i32_with_sve(<4 x i32> %vec, <4 x i1> %mask) {
ret <4 x i32> %out
}
+define <4 x i16> @test_compress_v4i16_with_sve(<4 x i16> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4i16_with_sve:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.4h, v1.4h, #15
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %out = call <4 x i16> @llvm.experimental.vector.compress(<4 x i16> %vec, <4 x i1> %mask, <4 x i16> poison)
+ ret <4 x i16> %out
+}
+
+define <4 x half> @test_compress_v4f16_with_sve(<4 x half> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4f16_with_sve:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.4h, v1.4h, #15
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %out = call <4 x half> @llvm.experimental.vector.compress(<4 x half> %vec, <4 x i1> %mask, <4 x half> poison)
+ ret <4 x half> %out
+}
+
define <1 x i32> @test_compress_v1i32_with_sve(<1 x i32> %vec, <1 x i1> %mask) {
; CHECK-LABEL: test_compress_v1i32_with_sve:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list