[llvm] [GlobalISel][AArch64] Legalize G_EXTRACT_SUBVECTOR for SVE (PR #114519)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 2 12:45:09 PDT 2024
https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/114519
>From 96ca3c8818916da6912449846121e13987e12481 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 1 Nov 2024 09:10:04 +0100
Subject: [PATCH 1/4] [GlobalISel][AArch64] Legalize G_EXTRACT_SUBVECTOR for
SVE
We use stores because return is not support for smaller granuels.
{nxv2s16, nxv4s16} fails with:
LLVM ERROR: cannot select: %0:zpr(<vscale x 4 x s16>) = G_TRUNC %2:fpr(<vscale x 4 x s32>) (in function: extract_nxv2i16_nxv4i16_1)
---
.../GlobalISel/LegalizationArtifactCombiner.h | 3 +
.../CodeGen/GlobalISel/LegalityPredicates.cpp | 2 +
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 42 ++++++++++----
.../AArch64/GlobalISel/extract_subvector.ll | 55 +++++++++++++++++++
4 files changed, 90 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 471a7f70dd546c..a61943f29d18fb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -267,6 +267,9 @@ class LegalizationArtifactCombiner {
const LLT DstTy = MRI.getType(DstReg);
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ if (DstTy.isScalableVector())
+ return false;
+
// Try to fold trunc(g_constant) when the smaller constant type is legal.
auto *SrcMI = MRI.getVRegDef(SrcReg);
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index b7541effafe5ce..93e716a22814ca 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -196,6 +196,8 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
+ if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector())
+ return true;
return !llvm::has_single_bit<uint32_t>(
Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
};
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index f162d1c2973cbc..4a1f3555584fcb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -61,11 +61,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT v2s64 = LLT::fixed_vector(2, 64);
const LLT v2p0 = LLT::fixed_vector(2, p0);
+ // 128 bit
const LLT nxv16s8 = LLT::scalable_vector(16, s8);
const LLT nxv8s16 = LLT::scalable_vector(8, s16);
const LLT nxv4s32 = LLT::scalable_vector(4, s32);
const LLT nxv2s64 = LLT::scalable_vector(2, s64);
+ // 64 bit
+ const LLT nxv4s16 = LLT::scalable_vector(4, s16);
+ const LLT nxv2s32 = LLT::scalable_vector(2, s32);
+
+ // 32 bit
+ const LLT nxv2s16 = LLT::scalable_vector(2, s16);
+
std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
v16s8, v8s16, v4s32,
v2s64, v2p0,
@@ -442,16 +450,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
{v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
{v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
- .legalForTypesWithMemDesc({
- // SVE vscale x 128 bit base sizes
- // TODO: Add nxv2p0. Consider bitcastIf.
- // See #92130
- // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
- {nxv16s8, p0, nxv16s8, 8},
- {nxv8s16, p0, nxv8s16, 8},
- {nxv4s32, p0, nxv4s32, 8},
- {nxv2s64, p0, nxv2s64, 8},
- })
+ .legalForTypesWithMemDesc(
+ {// SVE vscale x 128 bit base sizes
+ // TODO: Add nxv2p0. Consider bitcastIf.
+ // See #92130
+ // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
+ {nxv16s8, p0, nxv16s8, 8},
+ {nxv8s16, p0, nxv8s16, 8},
+ {nxv4s32, p0, nxv4s32, 8},
+ {nxv2s64, p0, nxv2s64, 8},
+ // SVE vscale x 64 bit base sizes
+ {nxv2s32, p0, nxv2s32, 8},
+ {nxv4s16, p0, nxv4s16, 8},
+ // SVE vscale x 32 bit base sizes
+ {nxv2s16, p0, nxv2s16, 8}})
.clampScalar(0, s8, s64)
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
@@ -639,17 +651,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_TRUNC)
.legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+ .legalFor(HasSVE, {{nxv4s16, nxv4s32}})
.moreElementsToNextPow2(0)
.clampMaxNumElements(0, s8, 8)
.clampMaxNumElements(0, s16, 4)
.clampMaxNumElements(0, s32, 2)
.minScalarOrEltIf(
- [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].isFixedVector();
+ },
0, s8)
.lowerIf([=](const LegalityQuery &Query) {
LLT DstTy = Query.Types[0];
LLT SrcTy = Query.Types[1];
- return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
+ return DstTy.isFixedVector() && SrcTy.getSizeInBits() > 128 &&
DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
})
.clampMinNumElements(0, s8, 8)
@@ -1315,8 +1330,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
+ // FIXME: {nxv2s16, nxv4s16}
getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
.legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
+ .legalFor(HasSVE,
+ {{nxv2s16, nxv8s16}, {nxv4s16, nxv8s16}, {nxv2s32, nxv4s32}})
.widenScalarOrEltToNextPow2(0)
.immIdx(0); // Inform verifier imm idx 0 is handled.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
new file mode 100644
index 00000000000000..ab302071b815a3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -aarch64-enable-gisel-sve=1 -stop-after=irtranslator < %s | FileCheck %s
+
+define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i32_nxv4i32_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1w { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %arg, i64 0)
+ store <vscale x 2 x i32> %ext, ptr %p
+ ret void
+}
+
+define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv4i16_nxv8i16_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1h { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+ store <vscale x 4 x i16> %ext, ptr %p
+ ret void
+}
+
+define void @extract_nxv2i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpkhi z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 2)
+ store <vscale x 2 x i16> %ext, ptr %p
+ ret void
+}
+
+define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+ store <vscale x 2 x i16> %ext, ptr %p
+ ret void
+}
>From 98c5f06041492a50a4802c656de32dce37e05b4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 1 Nov 2024 10:05:01 +0100
Subject: [PATCH 2/4] cleanup test
---
.../AArch64/GlobalISel/extract_subvector.ll | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
index ab302071b815a3..d9032bd9f38c40 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
@@ -2,10 +2,8 @@
; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
-;; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -aarch64-enable-gisel-sve=1 -stop-after=irtranslator < %s | FileCheck %s
-
-define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
-; CHECK-LABEL: extract_nxv2i32_nxv4i32_1:
+define void @extract_nxv2i32_nxv4i32(<vscale x 4 x i32> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i32_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ptrue p0.d
@@ -16,8 +14,8 @@ define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
ret void
}
-define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
-; CHECK-LABEL: extract_nxv4i16_nxv8i16_1:
+define void @extract_nxv4i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv4i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.s
@@ -28,8 +26,8 @@ define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
ret void
}
-define void @extract_nxv2i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
-; CHECK-LABEL: extract_nxv2i16_nxv8i16_1:
+define void @extract_nxv2i16_nxv8i16_2(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.d
>From 7ff8d2a67b9f17e9cf378f511d3370dbf7b274f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 1 Nov 2024 16:57:45 +0100
Subject: [PATCH 3/4] address review comment
---
llvm/test/CodeGen/AArch64/{GlobalISel => }/extract_subvector.ll | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/CodeGen/AArch64/{GlobalISel => }/extract_subvector.ll (100%)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll b/llvm/test/CodeGen/AArch64/extract_subvector.ll
similarity index 100%
rename from llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
rename to llvm/test/CodeGen/AArch64/extract_subvector.ll
>From 790155e454c4ae9db28b259f6064d17f7adb3190 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 2 Nov 2024 20:44:26 +0100
Subject: [PATCH 4/4] fix fixme
---
.../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 11 +++++++----
llvm/test/CodeGen/AArch64/extract_subvector.ll | 15 +++++++++++++++
2 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4a1f3555584fcb..581d57bb14bfeb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -393,7 +393,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v8s16, p0, s128, 8},
{v2s32, p0, s64, 8},
{v4s32, p0, s128, 8},
- {v2s64, p0, s128, 8}})
+ {v2s64, p0, s128, 8},
+ // SVE vscale x 64 bit base sizes
+ {nxv4s16, p0, nxv4s16, 8}})
// These extends are also legal
.legalForTypesWithMemDesc(
{{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
@@ -1330,11 +1332,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
- // FIXME: {nxv2s16, nxv4s16}
getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
.legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
- .legalFor(HasSVE,
- {{nxv2s16, nxv8s16}, {nxv4s16, nxv8s16}, {nxv2s32, nxv4s32}})
+ .legalFor(HasSVE, {{nxv2s16, nxv4s16},
+ {nxv2s16, nxv8s16},
+ {nxv4s16, nxv8s16},
+ {nxv2s32, nxv4s32}})
.widenScalarOrEltToNextPow2(0)
.immIdx(0); // Inform verifier imm idx 0 is handled.
diff --git a/llvm/test/CodeGen/AArch64/extract_subvector.ll b/llvm/test/CodeGen/AArch64/extract_subvector.ll
index d9032bd9f38c40..7d35e58923bfba 100644
--- a/llvm/test/CodeGen/AArch64/extract_subvector.ll
+++ b/llvm/test/CodeGen/AArch64/extract_subvector.ll
@@ -51,3 +51,18 @@ define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
store <vscale x 2 x i16> %ext, ptr %p
ret void
}
+
+define void @extract_nxv2i16_nxv4i16(ptr %p, ptr %p2) {
+; CHECK-LABEL: extract_nxv2i16_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: st1h { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %vector = load <vscale x 4 x i16>, ptr %p
+ %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv4i16(<vscale x 4 x i16> %vector, i64 0)
+ store <vscale x 2 x i16> %ext, ptr %p2
+ ret void
+}
More information about the llvm-commits
mailing list