[llvm] [GlobalISel][AArch64] Legalize G_EXTRACT_SUBVECTOR for SVE (PR #114519)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 2 13:29:43 PDT 2024


https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/114519

>From 8b77d3cd549133b6a92872d93386722d75cd2557 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 1 Nov 2024 09:10:04 +0100
Subject: [PATCH 1/4] [GlobalISel][AArch64] Legalize G_EXTRACT_SUBVECTOR for
 SVE

We use stores because return is not support for smaller granuels.

{nxv2s16, nxv4s16} fails with:

LLVM ERROR: cannot select: %0:zpr(<vscale x 4 x s16>) = G_TRUNC %2:fpr(<vscale x 4 x s32>) (in function: extract_nxv2i16_nxv4i16_1)
---
 .../GlobalISel/LegalizationArtifactCombiner.h |  3 +
 .../CodeGen/GlobalISel/LegalityPredicates.cpp |  2 +
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    | 42 ++++++++++----
 .../AArch64/GlobalISel/extract_subvector.ll   | 55 +++++++++++++++++++
 4 files changed, 90 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 471a7f70dd546c..a61943f29d18fb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -267,6 +267,9 @@ class LegalizationArtifactCombiner {
     const LLT DstTy = MRI.getType(DstReg);
     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
 
+    if (DstTy.isScalableVector())
+      return false;
+
     // Try to fold trunc(g_constant) when the smaller constant type is legal.
     auto *SrcMI = MRI.getVRegDef(SrcReg);
     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index b7541effafe5ce..93e716a22814ca 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -196,6 +196,8 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
 
 LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
   return [=](const LegalityQuery &Query) {
+    if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector())
+      return true;
     return !llvm::has_single_bit<uint32_t>(
         Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
   };
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index f162d1c2973cbc..4a1f3555584fcb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -61,11 +61,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   const LLT v2s64 = LLT::fixed_vector(2, 64);
   const LLT v2p0 = LLT::fixed_vector(2, p0);
 
+  // 128 bit
   const LLT nxv16s8 = LLT::scalable_vector(16, s8);
   const LLT nxv8s16 = LLT::scalable_vector(8, s16);
   const LLT nxv4s32 = LLT::scalable_vector(4, s32);
   const LLT nxv2s64 = LLT::scalable_vector(2, s64);
 
+  // 64 bit
+  const LLT nxv4s16 = LLT::scalable_vector(4, s16);
+  const LLT nxv2s32 = LLT::scalable_vector(2, s32);
+
+  // 32 bit
+  const LLT nxv2s16 = LLT::scalable_vector(2, s16);
+
   std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
                                                         v16s8, v8s16, v4s32,
                                                         v2s64, v2p0,
@@ -442,16 +450,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
            {p0, p0, s64, 8},    {s128, p0, s128, 8},  {v16s8, p0, s128, 8},
            {v8s8, p0, s64, 8},  {v4s16, p0, s64, 8},  {v8s16, p0, s128, 8},
            {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
-      .legalForTypesWithMemDesc({
-          // SVE vscale x 128 bit base sizes
-          // TODO: Add nxv2p0. Consider bitcastIf.
-          //       See #92130
-          // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
-          {nxv16s8, p0, nxv16s8, 8},
-          {nxv8s16, p0, nxv8s16, 8},
-          {nxv4s32, p0, nxv4s32, 8},
-          {nxv2s64, p0, nxv2s64, 8},
-      })
+      .legalForTypesWithMemDesc(
+          {// SVE vscale x 128 bit base sizes
+           // TODO: Add nxv2p0. Consider bitcastIf.
+           //       See #92130
+           // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
+           {nxv16s8, p0, nxv16s8, 8},
+           {nxv8s16, p0, nxv8s16, 8},
+           {nxv4s32, p0, nxv4s32, 8},
+           {nxv2s64, p0, nxv2s64, 8},
+           // SVE vscale x 64 bit base sizes
+           {nxv2s32, p0, nxv2s32, 8},
+           {nxv4s16, p0, nxv4s16, 8},
+           // SVE vscale x 32 bit base sizes
+           {nxv2s16, p0, nxv2s16, 8}})
       .clampScalar(0, s8, s64)
       .lowerIf([=](const LegalityQuery &Query) {
         return Query.Types[0].isScalar() &&
@@ -639,17 +651,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_TRUNC)
       .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+      .legalFor(HasSVE, {{nxv4s16, nxv4s32}})
       .moreElementsToNextPow2(0)
       .clampMaxNumElements(0, s8, 8)
       .clampMaxNumElements(0, s16, 4)
       .clampMaxNumElements(0, s32, 2)
       .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+          [=](const LegalityQuery &Query) {
+            return Query.Types[0].isFixedVector();
+          },
           0, s8)
       .lowerIf([=](const LegalityQuery &Query) {
         LLT DstTy = Query.Types[0];
         LLT SrcTy = Query.Types[1];
-        return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
+        return DstTy.isFixedVector() && SrcTy.getSizeInBits() > 128 &&
                DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
       })
       .clampMinNumElements(0, s8, 8)
@@ -1315,8 +1330,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
 
+  // FIXME: {nxv2s16, nxv4s16}
   getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
       .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
+      .legalFor(HasSVE,
+                {{nxv2s16, nxv8s16}, {nxv4s16, nxv8s16}, {nxv2s32, nxv4s32}})
       .widenScalarOrEltToNextPow2(0)
       .immIdx(0); // Inform verifier imm idx 0 is handled.
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
new file mode 100644
index 00000000000000..ab302071b815a3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve  | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; RUN: llc -global-isel -mtriple=aarch64-linux-gnu  -mattr=+sve -O0  -aarch64-enable-gisel-sve=1 -stop-after=irtranslator < %s | FileCheck %s
+
+define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i32_nxv4i32_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %arg, i64 0)
+  store <vscale x 2 x i32> %ext, ptr %p
+  ret void
+}
+
+define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv4i16_nxv8i16_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+  store <vscale x 4 x i16> %ext, ptr %p
+  ret void
+}
+
+define void @extract_nxv2i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 2)
+  store <vscale x 2 x i16> %ext, ptr %p
+  ret void
+}
+
+define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+  store <vscale x 2 x i16> %ext, ptr %p
+  ret void
+}

>From 823682bb7bf8893250bb86d8e5f127378e6285e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 1 Nov 2024 10:05:01 +0100
Subject: [PATCH 2/4] cleanup test

---
 .../AArch64/GlobalISel/extract_subvector.ll        | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
index ab302071b815a3..d9032bd9f38c40 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
@@ -2,10 +2,8 @@
 ; RUN: llc < %s -mtriple aarch64 -mattr=+sve  | FileCheck %s
 ; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
 
-;; RUN: llc -global-isel -mtriple=aarch64-linux-gnu  -mattr=+sve -O0  -aarch64-enable-gisel-sve=1 -stop-after=irtranslator < %s | FileCheck %s
-
-define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
-; CHECK-LABEL: extract_nxv2i32_nxv4i32_1:
+define void @extract_nxv2i32_nxv4i32(<vscale x 4 x i32> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i32_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    ptrue p0.d
@@ -16,8 +14,8 @@ define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
   ret void
 }
 
-define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
-; CHECK-LABEL: extract_nxv4i16_nxv8i16_1:
+define void @extract_nxv4i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv4i16_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s
@@ -28,8 +26,8 @@ define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
   ret void
 }
 
-define void @extract_nxv2i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
-; CHECK-LABEL: extract_nxv2i16_nxv8i16_1:
+define void @extract_nxv2i16_nxv8i16_2(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ptrue p0.d

>From a37d2d85725179e452dd60efc562672feedd2286 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 1 Nov 2024 16:57:45 +0100
Subject: [PATCH 3/4] address review comment

---
 llvm/test/CodeGen/AArch64/{GlobalISel => }/extract_subvector.ll | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/CodeGen/AArch64/{GlobalISel => }/extract_subvector.ll (100%)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll b/llvm/test/CodeGen/AArch64/extract_subvector.ll
similarity index 100%
rename from llvm/test/CodeGen/AArch64/GlobalISel/extract_subvector.ll
rename to llvm/test/CodeGen/AArch64/extract_subvector.ll

>From 75682b2fc4a1ed946c79e519c3036e72716acf63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 2 Nov 2024 20:44:26 +0100
Subject: [PATCH 4/4] fix fixme

---
 .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 11 +++++++----
 llvm/test/CodeGen/AArch64/extract_subvector.ll    | 15 +++++++++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4a1f3555584fcb..581d57bb14bfeb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -393,7 +393,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                                  {v8s16, p0, s128, 8},
                                  {v2s32, p0, s64, 8},
                                  {v4s32, p0, s128, 8},
-                                 {v2s64, p0, s128, 8}})
+                                 {v2s64, p0, s128, 8},
+                                 // SVE vscale x 64 bit base sizes
+                                 {nxv4s16, p0, nxv4s16, 8}})
       // These extends are also legal
       .legalForTypesWithMemDesc(
           {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
@@ -1330,11 +1332,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
 
-  // FIXME: {nxv2s16, nxv4s16}
   getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
       .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
-      .legalFor(HasSVE,
-                {{nxv2s16, nxv8s16}, {nxv4s16, nxv8s16}, {nxv2s32, nxv4s32}})
+      .legalFor(HasSVE, {{nxv2s16, nxv4s16},
+                         {nxv2s16, nxv8s16},
+                         {nxv4s16, nxv8s16},
+                         {nxv2s32, nxv4s32}})
       .widenScalarOrEltToNextPow2(0)
       .immIdx(0); // Inform verifier imm idx 0 is handled.
 
diff --git a/llvm/test/CodeGen/AArch64/extract_subvector.ll b/llvm/test/CodeGen/AArch64/extract_subvector.ll
index d9032bd9f38c40..7d35e58923bfba 100644
--- a/llvm/test/CodeGen/AArch64/extract_subvector.ll
+++ b/llvm/test/CodeGen/AArch64/extract_subvector.ll
@@ -51,3 +51,18 @@ define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
   store <vscale x 2 x i16> %ext, ptr %p
   ret void
 }
+
+define void @extract_nxv2i16_nxv4i16(ptr %p, ptr %p2) {
+; CHECK-LABEL: extract_nxv2i16_nxv4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    st1h { z0.d }, p0, [x1]
+; CHECK-NEXT:    ret
+  %vector = load <vscale x 4 x i16>, ptr %p
+  %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv4i16(<vscale x 4 x i16> %vector, i64 0)
+  store <vscale x 2 x i16> %ext, ptr %p2
+  ret void
+}



More information about the llvm-commits mailing list