[llvm] [GlobalISel][AArch64] Legalize G_EXTRACT_SUBVECTOR for SVE (PR #114519)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 1 01:12:41 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Thorsten Schütt (tschuett)
<details>
<summary>Changes</summary>
We use stores because return is not support for smaller granuels.
{nxv2s16, nxv4s16} fails with:
LLVM ERROR: cannot select: %0:zpr(<vscale x 4 x s16>) = G_TRUNC %2:fpr(<vscale x 4 x s32>) (in function: extract_nxv2i16_nxv4i16_1)
---
Full diff: https://github.com/llvm/llvm-project/pull/114519.diff
4 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h (+3)
- (modified) llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp (+2)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+30-12)
- (added) llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll (+55)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 471a7f70dd546c..a61943f29d18fb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -267,6 +267,9 @@ class LegalizationArtifactCombiner {
const LLT DstTy = MRI.getType(DstReg);
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ if (DstTy.isScalableVector())
+ return false;
+
// Try to fold trunc(g_constant) when the smaller constant type is legal.
auto *SrcMI = MRI.getVRegDef(SrcReg);
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index b7541effafe5ce..93e716a22814ca 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -196,6 +196,8 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
+ if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector())
+ return true;
return !llvm::has_single_bit<uint32_t>(
Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
};
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index f162d1c2973cbc..4a1f3555584fcb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -61,11 +61,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT v2s64 = LLT::fixed_vector(2, 64);
const LLT v2p0 = LLT::fixed_vector(2, p0);
+ // 128 bit
const LLT nxv16s8 = LLT::scalable_vector(16, s8);
const LLT nxv8s16 = LLT::scalable_vector(8, s16);
const LLT nxv4s32 = LLT::scalable_vector(4, s32);
const LLT nxv2s64 = LLT::scalable_vector(2, s64);
+ // 64 bit
+ const LLT nxv4s16 = LLT::scalable_vector(4, s16);
+ const LLT nxv2s32 = LLT::scalable_vector(2, s32);
+
+ // 32 bit
+ const LLT nxv2s16 = LLT::scalable_vector(2, s16);
+
std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
v16s8, v8s16, v4s32,
v2s64, v2p0,
@@ -442,16 +450,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
{v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
{v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
- .legalForTypesWithMemDesc({
- // SVE vscale x 128 bit base sizes
- // TODO: Add nxv2p0. Consider bitcastIf.
- // See #92130
- // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
- {nxv16s8, p0, nxv16s8, 8},
- {nxv8s16, p0, nxv8s16, 8},
- {nxv4s32, p0, nxv4s32, 8},
- {nxv2s64, p0, nxv2s64, 8},
- })
+ .legalForTypesWithMemDesc(
+ {// SVE vscale x 128 bit base sizes
+ // TODO: Add nxv2p0. Consider bitcastIf.
+ // See #92130
+ // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
+ {nxv16s8, p0, nxv16s8, 8},
+ {nxv8s16, p0, nxv8s16, 8},
+ {nxv4s32, p0, nxv4s32, 8},
+ {nxv2s64, p0, nxv2s64, 8},
+ // SVE vscale x 64 bit base sizes
+ {nxv2s32, p0, nxv2s32, 8},
+ {nxv4s16, p0, nxv4s16, 8},
+ // SVE vscale x 32 bit base sizes
+ {nxv2s16, p0, nxv2s16, 8}})
.clampScalar(0, s8, s64)
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
@@ -639,17 +651,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_TRUNC)
.legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+ .legalFor(HasSVE, {{nxv4s16, nxv4s32}})
.moreElementsToNextPow2(0)
.clampMaxNumElements(0, s8, 8)
.clampMaxNumElements(0, s16, 4)
.clampMaxNumElements(0, s32, 2)
.minScalarOrEltIf(
- [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].isFixedVector();
+ },
0, s8)
.lowerIf([=](const LegalityQuery &Query) {
LLT DstTy = Query.Types[0];
LLT SrcTy = Query.Types[1];
- return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
+ return DstTy.isFixedVector() && SrcTy.getSizeInBits() > 128 &&
DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
})
.clampMinNumElements(0, s8, 8)
@@ -1315,8 +1330,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
+ // FIXME: {nxv2s16, nxv4s16}
getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
.legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
+ .legalFor(HasSVE,
+ {{nxv2s16, nxv8s16}, {nxv4s16, nxv8s16}, {nxv2s32, nxv4s32}})
.widenScalarOrEltToNextPow2(0)
.immIdx(0); // Inform verifier imm idx 0 is handled.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
new file mode 100644
index 00000000000000..ab302071b815a3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -aarch64-enable-gisel-sve=1 -stop-after=irtranslator < %s | FileCheck %s
+
+define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i32_nxv4i32_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1w { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %arg, i64 0)
+ store <vscale x 2 x i32> %ext, ptr %p
+ ret void
+}
+
+define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv4i16_nxv8i16_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1h { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+ store <vscale x 4 x i16> %ext, ptr %p
+ ret void
+}
+
+define void @extract_nxv2i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 2)
+ store <vscale x 2 x i16> %ext, ptr %p
+ ret void
+}
+
+define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+ store <vscale x 2 x i16> %ext, ptr %p
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/114519
More information about the llvm-commits
mailing list