[llvm] [GlobalISel][AArch64] Legalize G_EXTRACT_VECTOR_ELT for SVE (PR #115161)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 11:58:54 PST 2024
https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/115161
>From cf13f98d17b016fa8f23c53c7390b694e6cbedde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 2 Nov 2024 09:17:36 +0100
Subject: [PATCH] [GlobalISel][AArch64] Legalize G_EXTRACT_VECTOR_ELT for SVE
AArch64InstrGISel.td defines:
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
There are many patterns for SVE. Let's exploit that fact.
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 6 +-
.../GISel/AArch64InstructionSelector.cpp | 4 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 26 ++++-
.../CodeGen/AArch64/extract-vector-elt-sve.ll | 105 ++++++++++++++++++
4 files changed, 132 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index a87754389cc8ed..8cd3fa5f432b6e 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3227,8 +3227,10 @@ bool IRTranslator::translateExtractElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
- if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
- return translateCopy(U, *U.getOperand(0), MIRBuilder);
+ if (const FixedVectorType *FVT =
+ dyn_cast<FixedVectorType>(U.getOperand(0)->getType()))
+ if (FVT->getNumElements() == 1)
+ return translateCopy(U, *U.getOperand(0), MIRBuilder);
Register Res = getOrCreateVReg(U);
Register Val = getOrCreateVReg(*U.getOperand(0));
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 9502b1d10f9a2b..663117c6b85bf7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5316,7 +5316,9 @@ bool AArch64InstructionSelector::selectUSMovFromExtend(
return false;
Register Src0 = Extract->getOperand(1).getReg();
- const LLT &VecTy = MRI.getType(Src0);
+ const LLT VecTy = MRI.getType(Src0);
+ if (VecTy.isScalableVector())
+ return false;
if (VecTy.getSizeInBits() != 128) {
const MachineInstr *ScalarToVector = emitScalarToVector(
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index f7ca0ca65ac42b..3677cfdaba3b21 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -933,9 +933,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
});
}
+ // TODO : nxv4s16, nxv2s16, nxv2s32
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
+ .legalFor(HasSVE, {{s16, nxv16s8, s64},
+ {s16, nxv8s16, s64},
+ {s32, nxv4s32, s64},
+ {s64, nxv2s64, s64}})
.unsupportedIf([=](const LegalityQuery &Query) {
const LLT &EltTy = Query.Types[1].getElementType();
+ if (Query.Types[1].isScalableVector())
+ return false;
return Query.Types[0] != EltTy;
})
.minScalar(2, s64)
@@ -949,22 +956,26 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
[=](const LegalityQuery &Query) {
// We want to promote to <M x s1> to <M x s64> if that wouldn't
// cause the total vec size to be > 128b.
- return Query.Types[1].getNumElements() <= 2;
+ return Query.Types[1].isFixedVector() &&
+ Query.Types[1].getNumElements() <= 2;
},
0, s64)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
- return Query.Types[1].getNumElements() <= 4;
+ return Query.Types[1].isFixedVector() &&
+ Query.Types[1].getNumElements() <= 4;
},
0, s32)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
- return Query.Types[1].getNumElements() <= 8;
+ return Query.Types[1].isFixedVector() &&
+ Query.Types[1].getNumElements() <= 8;
},
0, s16)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
- return Query.Types[1].getNumElements() <= 16;
+ return Query.Types[1].isFixedVector() &&
+ Query.Types[1].getNumElements() <= 16;
},
0, s8)
.minScalarOrElt(0, s8) // Worst case, we need at least s8.
@@ -2178,11 +2189,14 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
bool AArch64LegalizerInfo::legalizeExtractVectorElt(
MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
- assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
+ const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
auto VRegAndVal =
- getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(Element->getIndexReg(), MRI);
if (VRegAndVal)
return true;
+ LLT VecTy = MRI.getType(Element->getVectorReg());
+ if (VecTy.isScalableVector())
+ return true;
return Helper.lowerExtractInsertVectorElt(MI) !=
LegalizerHelper::LegalizeResult::UnableToLegalize;
}
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
index 75c8f8923c3815..d18af3d5ae9450 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
@@ -121,3 +121,108 @@ entry:
%d = insertelement <vscale x 16 x i8> %vec, i8 %elt, i64 %idx
ret <vscale x 16 x i8> %d
}
+
+define i64 @extract_vscale_2_i64(<vscale x 2 x i64> %vec, i64 %idx) {
+; CHECK-SD-LABEL: extract_vscale_2_i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: whilels p0.d, xzr, x0
+; CHECK-SD-NEXT: lastb x0, p0, z0.d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extract_vscale_2_i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: whilels p0.d, xzr, x0
+; CHECK-GI-NEXT: lastb d0, p0, z0.d
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: ret
+entry:
+ %d = extractelement <vscale x 2 x i64> %vec, i64 %idx
+ ret i64 %d
+}
+
+define i64 @extract_vscale_2_i64_zero(<vscale x 2 x i64> %vec, i64 %idx) {
+; CHECK-LABEL: extract_vscale_2_i64_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %d = extractelement <vscale x 2 x i64> %vec, i64 0
+ ret i64 %d
+}
+
+define i32 @extract_vscale_4_i32(<vscale x 4 x i32> %vec, i64 %idx) {
+; CHECK-SD-LABEL: extract_vscale_4_i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: whilels p0.s, xzr, x0
+; CHECK-SD-NEXT: lastb w0, p0, z0.s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extract_vscale_4_i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: whilels p0.s, xzr, x0
+; CHECK-GI-NEXT: lastb s0, p0, z0.s
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
+entry:
+ %d = extractelement <vscale x 4 x i32> %vec, i64 %idx
+ ret i32 %d
+}
+
+define i32 @extract_vscale_4_i32_zero(<vscale x 4 x i32> %vec, i64 %idx) {
+; CHECK-LABEL: extract_vscale_4_i32_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %d = extractelement <vscale x 4 x i32> %vec, i64 0
+ ret i32 %d
+}
+
+define i16 @extract_vscale_8_i16(<vscale x 8 x i16> %vec, i64 %idx) {
+; CHECK-SD-LABEL: extract_vscale_8_i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: whilels p0.h, xzr, x0
+; CHECK-SD-NEXT: lastb w0, p0, z0.h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extract_vscale_8_i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: whilels p0.h, xzr, x0
+; CHECK-GI-NEXT: lastb h0, p0, z0.h
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
+entry:
+ %d = extractelement <vscale x 8 x i16> %vec, i64 %idx
+ ret i16 %d
+}
+
+define i16 @extract_vscale_8_i16_zero(<vscale x 8 x i16> %vec, i64 %idx) {
+; CHECK-LABEL: extract_vscale_8_i16_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %d = extractelement <vscale x 8 x i16> %vec, i64 0
+ ret i16 %d
+}
+
+define i8 @extract_vscale_16_i8(<vscale x 16 x i8> %vec, i64 %idx) {
+; CHECK-LABEL: extract_vscale_16_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilels p0.b, xzr, x0
+; CHECK-NEXT: lastb w0, p0, z0.b
+; CHECK-NEXT: ret
+entry:
+ %d = extractelement <vscale x 16 x i8> %vec, i64 %idx
+ ret i8 %d
+}
+
+define i8 @extract_vscale_16_i8_zero(<vscale x 16 x i8> %vec, i64 %idx) {
+; CHECK-LABEL: extract_vscale_16_i8_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %d = extractelement <vscale x 16 x i8> %vec, i64 0
+ ret i8 %d
+}
More information about the llvm-commits
mailing list