[llvm] baa4983 - [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to SVE (#173256)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 24 06:26:17 PST 2025
Author: Benjamin Maxwell
Date: 2025-12-24T14:26:13Z
New Revision: baa49835da4d564bd7310474751c521bdf524d5b
URL: https://github.com/llvm/llvm-project/commit/baa49835da4d564bd7310474751c521bdf524d5b
DIFF: https://github.com/llvm/llvm-project/commit/baa49835da4d564bd7310474751c521bdf524d5b.diff
LOG: [AArch64] Support lowering v4i16/f16 VECTOR_COMPRESS nodes to SVE (#173256)
This is a follow-up to #171162, which broke the (untested) lowering of
v4i16/f16 to SVE.
See: https://github.com/llvm/llvm-project/pull/171162#discussion_r2601901376
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-vector-compress.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 22c5f7dffa80d..817b00206561d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -191,6 +191,8 @@ class VectorLegalizer {
void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
bool NonArithmetic);
+ void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -705,6 +707,33 @@ void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
Results.push_back(Res);
}
+void VectorLegalizer::PromoteVECTOR_COMPRESS(
+ SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ MVT PromotedVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT.getSimpleVT());
+ assert((VT.isInteger() || VT.getSizeInBits() == PromotedVT.getSizeInBits()) &&
+ "Only integer promotion or bitcasts between types is supported");
+
+ SDValue Vec = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue Passthru = Node->getOperand(2);
+ if (VT.isInteger()) {
+ Vec = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Vec);
+ Mask = TLI.promoteTargetBoolean(DAG, Mask, PromotedVT);
+ Passthru = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Passthru);
+ } else {
+ Vec = DAG.getBitcast(PromotedVT, Vec);
+ Passthru = DAG.getBitcast(PromotedVT, Passthru);
+ }
+
+ SDValue Result =
+ DAG.getNode(ISD::VECTOR_COMPRESS, DL, PromotedVT, Vec, Mask, Passthru);
+ Result = VT.isInteger() ? DAG.getNode(ISD::TRUNCATE, DL, VT, Result)
+ : DAG.getBitcast(VT, Result);
+ Results.push_back(Result);
+}
+
void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
@@ -745,6 +774,10 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMINIMUM:
PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
return;
+ case ISD::VECTOR_COMPRESS:
+ PromoteVECTOR_COMPRESS(Node, Results);
+ return;
+
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
// These operations are used to do promotion so they can't be promoted
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 30e2a7b890c6d..39d7674671876 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2003,6 +2003,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MVT::v2f64})
setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
+ // Promote v4i16/f16 to v4i32/f32 as the SVE container for v4i16 is nxv8,
+ // which is not supported with for compact (with only +sve).
+ setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4bf16, MVT::v4i16);
+ setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4f16, MVT::v4i16);
+ setOperationPromotedToType(ISD::VECTOR_COMPRESS, MVT::v4i16, MVT::v4i32);
+
for (auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
MVT::nxv4i32, MVT::nxv4f32}) {
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index fc46460cef758..ece7bef54ddea 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -205,6 +205,54 @@ define <4 x i32> @test_compress_v4i32_with_sve(<4 x i32> %vec, <4 x i1> %mask) {
ret <4 x i32> %out
}
+define <4 x i16> @test_compress_v4i16_with_sve(<4 x i16> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4i16_with_sve:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.4h, v1.4h, #15
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %out = call <4 x i16> @llvm.experimental.vector.compress(<4 x i16> %vec, <4 x i1> %mask, <4 x i16> poison)
+ ret <4 x i16> %out
+}
+
+define <4 x half> @test_compress_v4f16_with_sve(<4 x half> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4f16_with_sve:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.4h, v1.4h, #15
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %out = call <4 x half> @llvm.experimental.vector.compress(<4 x half> %vec, <4 x i1> %mask, <4 x half> poison)
+ ret <4 x half> %out
+}
+
+define <4 x bfloat> @test_compress_v4bf16_with_sve(<4 x bfloat> %vec, <4 x i1> %mask) {
+; CHECK-LABEL: test_compress_v4bf16_with_sve:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shl v1.4h, v1.4h, #15
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
+; CHECK-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %out = call <4 x bfloat> @llvm.experimental.vector.compress(<4 x bfloat> %vec, <4 x i1> %mask, <4 x bfloat> poison)
+ ret <4 x bfloat> %out
+}
+
define <1 x i32> @test_compress_v1i32_with_sve(<1 x i32> %vec, <1 x i1> %mask) {
; CHECK-LABEL: test_compress_v1i32_with_sve:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list