[llvm] [TTI][TLI] Support scalable immediates with isLegalAddImmediate (PR #84173)

Thu Mar 14 06:38:03 PDT 2024

https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/84173

>From c9326f25e3821dfb7cff343727c511f33d62b80f Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 13 Mar 2024 16:21:35 +0000
Subject: [PATCH 1/3] [TTI][TLI][AArch64] Add isLegalAddScalableImmediate

Adds an interface to determine whether an immediate would be legal within
an add instruction, when said immediate is multiplied by vscale.
---
 .../llvm/Analysis/TargetTransformInfo.h       | 10 +++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 +
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  4 ++
 llvm/include/llvm/CodeGen/TargetLowering.h    |  6 ++
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  4 ++
 llvm/unittests/Target/AArch64/CMakeLists.txt  |  1 +
 llvm/unittests/Target/AArch64/Immediates.cpp  | 62 +++++++++++++++++++
 7 files changed, 89 insertions(+)
 create mode 100644 llvm/unittests/Target/AArch64/Immediates.cpp

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4eab357f1b33b6..b15265661ff487 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -696,6 +696,12 @@ class TargetTransformInfo {
   /// immediate without having to materialize the immediate into a register.
   bool isLegalAddImmediate(int64_t Imm) const;
 
+  /// Return true if the specified immediate is legal add of a scalable
+  /// immediate, that is the target has add instructions which can add a
+  /// register with the immediate (multiplied by vscale) without having to
+  /// materialize the immediate into a register.
+  bool isLegalAddScalableImmediate(int64_t Imm) const;
+
   /// Return true if the specified immediate is legal icmp immediate,
   /// that is the target has icmp instructions which can compare a register
   /// against the immediate without having to materialize the immediate into a
@@ -1835,6 +1841,7 @@ class TargetTransformInfo::Concept {
       std::function<void(Instruction *, unsigned, APInt, APInt &)>
           SimplifyAndSetOp) = 0;
   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
+  virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                      int64_t BaseOffset, bool HasBaseReg,
@@ -2295,6 +2302,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool isLegalAddImmediate(int64_t Imm) override {
     return Impl.isLegalAddImmediate(Imm);
   }
+  bool isLegalAddScalableImmediate(int64_t Imm) override {
+    return Impl.isLegalAddScalableImmediate(Imm);
+  }
   bool isLegalICmpImmediate(int64_t Imm) override {
     return Impl.isLegalICmpImmediate(Imm);
   }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7f661bb4a1df20..20b83771667fe0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -216,6 +216,8 @@ class TargetTransformInfoImplBase {
 
   bool isLegalAddImmediate(int64_t Imm) const { return false; }
 
+  bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
+
   bool isLegalICmpImmediate(int64_t Imm) const { return false; }
 
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 61f6564e8cd79b..2ebd0168975c71 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -328,6 +328,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return getTLI()->isLegalAddImmediate(imm);
   }
 
+  bool isLegalAddScalableImmediate(int64_t Imm) {
+    return getTLI()->isLegalAddScalableImmediate(Imm);
+  }
+
   bool isLegalICmpImmediate(int64_t imm) {
     return getTLI()->isLegalICmpImmediate(imm);
   }
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 2f164a460db843..fa662ff98767ff 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2770,6 +2770,12 @@ class TargetLoweringBase {
     return true;
   }
 
+  /// Return true if the specified immediate is legal add of a scalable
+  /// immediate, that is the target has add instructions which can add a
+  /// register with the immediate (multiplied by vscale) without having to
+  /// materialize the immediate into a register.
+  virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
+
   /// Return true if the specified immediate is legal for the value input of a
   /// store instruction.
   virtual bool isLegalStoreImmediate(int64_t Value) const {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 15311be4dba277..85c4b5b6f075e6 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -395,6 +395,10 @@ bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
   return TTIImpl->isLegalAddImmediate(Imm);
 }
 
+bool TargetTransformInfo::isLegalAddScalableImmediate(int64_t Imm) const {
+  return TTIImpl->isLegalAddScalableImmediate(Imm);
+}
+
 bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
   return TTIImpl->isLegalICmpImmediate(Imm);
 }
diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index dacd919ba1e33b..64ab991ac479a4 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_target_unittest(AArch64Tests
   MatrixRegisterAliasing.cpp
   SMEAttributesTest.cpp
   AArch64SVESchedPseudoTest.cpp
+  Immediates.cpp
   )
 
 set_property(TARGET AArch64Tests PROPERTY FOLDER "Tests/UnitTests/TargetTests")
diff --git a/llvm/unittests/Target/AArch64/Immediates.cpp b/llvm/unittests/Target/AArch64/Immediates.cpp
new file mode 100644
index 00000000000000..27badd60eb494f
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/Immediates.cpp
@@ -0,0 +1,62 @@
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+struct TestCase {
+  int64_t Imm;
+  bool Result;
+};
+
+const std::initializer_list<TestCase> Tests = {
+    // ScalableImm, Result
+    // No change, easily 'supported'
+    {0, false},
+
+    // addvl increments by whole registers, range [-32,31]
+    // +(16 * vscale), one register's worth
+    {16, false},
+    // +(8 * vscale), half a register's worth
+    {8, false},
+    // -(32 * 16 * vscale)
+    {-512, false},
+    // -(33 * 16 * vscale)
+    {-528, false},
+    // +(31 * 16 * vscale)
+    {496, false},
+    // +(32 * 16 * vscale)
+    {512, false},
+};
+} // namespace
+
+TEST(Immediates, Immediates) {
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64Target();
+  LLVMInitializeAArch64TargetMC();
+
+  std::string Error;
+  auto TT = Triple::normalize("aarch64");
+  const Target *T = TargetRegistry::lookupTarget(TT, Error);
+
+  std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+      TT, "generic", "+sve", TargetOptions(), std::nullopt, std::nullopt,
+      CodeGenOptLevel::Default));
+  AArch64Subtarget ST(TM->getTargetTriple(), TM->getTargetCPU(),
+                      TM->getTargetCPU(), TM->getTargetFeatureString(), *TM,
+                      true);
+
+  auto *TLI = ST.getTargetLowering();
+
+  for (const auto &Test : Tests) {
+    ASSERT_EQ(TLI->isLegalAddScalableImmediate(Test.Imm), Test.Result);
+  }
+}

>From 4ef1e5342a901bbcff59aa527164dd9b3c93bf73 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 6 Mar 2024 13:38:47 +0000
Subject: [PATCH 2/3] [AArch64] Recognize legal add immediates for addvl

---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 +++++++++++++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h   |  1 +
 llvm/unittests/Target/AArch64/Immediates.cpp    |  8 ++++----
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9665ae5ceb903f..37a8242362a876 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16595,6 +16595,21 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
   return IsLegal;
 }
 
+bool AArch64TargetLowering::isLegalAddScalableImmediate(int64_t Imm) const {
+  // Scalable immediates require SVE support.
+  if (!Subtarget->hasSVE())
+    return false;
+
+  // addvl's immediates are in terms of the number of bytes in a register.
+  // Since there are 16 in the base supported size (128bits), we need to
+  // divide the immediate by that much to give us a useful immediate to
+  // multiply by vscale. We can't have a remainder as a result of this.
+  if (Imm % 16 != 0)
+    return false;
+
+  return isInt<6>(Imm / 16);
+}
+
 // Return false to prevent folding
 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
 // if the folding leads to worse code.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 68341c199e0a2a..0b6f1b985e616f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -689,6 +689,7 @@ class AArch64TargetLowering : public TargetLowering {
                                        StoreInst *SI) const override;
 
   bool isLegalAddImmediate(int64_t) const override;
+  bool isLegalAddScalableImmediate(int64_t) const override;
   bool isLegalICmpImmediate(int64_t) const override;
 
   bool isMulAddWithConstProfitable(SDValue AddNode,
diff --git a/llvm/unittests/Target/AArch64/Immediates.cpp b/llvm/unittests/Target/AArch64/Immediates.cpp
index 27badd60eb494f..a8e0dc661f8440 100644
--- a/llvm/unittests/Target/AArch64/Immediates.cpp
+++ b/llvm/unittests/Target/AArch64/Immediates.cpp
@@ -20,19 +20,19 @@ struct TestCase {
 const std::initializer_list<TestCase> Tests = {
     // ScalableImm, Result
     // No change, easily 'supported'
-    {0, false},
+    {0, true},
 
     // addvl increments by whole registers, range [-32,31]
     // +(16 * vscale), one register's worth
-    {16, false},
+    {16, true},
     // +(8 * vscale), half a register's worth
     {8, false},
     // -(32 * 16 * vscale)
-    {-512, false},
+    {-512, true},
     // -(33 * 16 * vscale)
     {-528, false},
     // +(31 * 16 * vscale)
-    {496, false},
+    {496, true},
     // +(32 * 16 * vscale)
     {512, false},
 };

>From f2704d2138d89141ca0636a45026881c2fe58609 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 14 Mar 2024 12:05:30 +0000
Subject: [PATCH 3/3] Require SVE2, support inc/dec immediates, adjust doxygen
 comment

---
 .../llvm/Analysis/TargetTransformInfo.h       |  8 +--
 llvm/include/llvm/CodeGen/TargetLowering.h    |  8 +--
 .../Target/AArch64/AArch64ISelLowering.cpp    | 27 ++++++++--
 llvm/unittests/Target/AArch64/Immediates.cpp  | 50 +++++++++++++++++--
 4 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index b15265661ff487..584335e4c3c252 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -696,10 +696,10 @@ class TargetTransformInfo {
   /// immediate without having to materialize the immediate into a register.
   bool isLegalAddImmediate(int64_t Imm) const;
 
-  /// Return true if the specified immediate is legal add of a scalable
-  /// immediate, that is the target has add instructions which can add a
-  /// register with the immediate (multiplied by vscale) without having to
-  /// materialize the immediate into a register.
+  /// Return true if adding the specified scalable immediate is legal, that is
+  /// the target has add instructions which can add a register with the
+  /// immediate (multiplied by vscale) without having to materialize the
+  /// immediate into a register.
   bool isLegalAddScalableImmediate(int64_t Imm) const;
 
   /// Return true if the specified immediate is legal icmp immediate,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fa662ff98767ff..b660562479170f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2770,10 +2770,10 @@ class TargetLoweringBase {
     return true;
   }
 
-  /// Return true if the specified immediate is legal add of a scalable
-  /// immediate, that is the target has add instructions which can add a
-  /// register with the immediate (multiplied by vscale) without having to
-  /// materialize the immediate into a register.
+  /// Return true if adding the specified scalable immediate is legal, that is
+  /// the target has add instructions which can add a register with the
+  /// immediate (multiplied by vscale) without having to materialize the
+  /// immediate into a register.
   virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
 
   /// Return true if the specified immediate is legal for the value input of a
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 37a8242362a876..fb2a05f9e0338d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16596,18 +16596,35 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
 }
 
 bool AArch64TargetLowering::isLegalAddScalableImmediate(int64_t Imm) const {
-  // Scalable immediates require SVE support.
-  if (!Subtarget->hasSVE())
+  // We will only emit addvl/inc* instructions for SVE2
+  if (!Subtarget->hasSVE2())
     return false;
 
   // addvl's immediates are in terms of the number of bytes in a register.
   // Since there are 16 in the base supported size (128bits), we need to
   // divide the immediate by that much to give us a useful immediate to
   // multiply by vscale. We can't have a remainder as a result of this.
-  if (Imm % 16 != 0)
-    return false;
+  if (Imm % 16 == 0)
+    return isInt<6>(Imm / 16);
+
+  // Inc[b|h|w|d] instructions take a pattern and a positive immediate
+  // multiplier. For now, assume a pattern of 'all'. Incb would be a subset
+  // of addvl as a result, so only take h|w|d into account.
+  // Dec[h|w|d] will cover subtractions.
+  // Immediates are in the range [1,16], so we can't do a 2's complement check.
+  // FIXME: Can we make use of other patterns to cover other immediates?
+
+  // inch|dech
+  if (Imm % 8 == 0)
+    return std::labs(Imm / 8) <= 16;
+  // incw|decw
+  if (Imm % 4 == 0)
+    return std::labs(Imm / 4) <= 16;
+  // incd|decd
+  if (Imm % 2 == 0)
+    return std::labs(Imm / 2) <= 16;
 
-  return isInt<6>(Imm / 16);
+  return false;
 }
 
 // Return false to prevent folding
diff --git a/llvm/unittests/Target/AArch64/Immediates.cpp b/llvm/unittests/Target/AArch64/Immediates.cpp
index a8e0dc661f8440..a4551b083b4407 100644
--- a/llvm/unittests/Target/AArch64/Immediates.cpp
+++ b/llvm/unittests/Target/AArch64/Immediates.cpp
@@ -25,8 +25,6 @@ const std::initializer_list<TestCase> Tests = {
     // addvl increments by whole registers, range [-32,31]
     // +(16 * vscale), one register's worth
     {16, true},
-    // +(8 * vscale), half a register's worth
-    {8, false},
     // -(32 * 16 * vscale)
     {-512, true},
     // -(33 * 16 * vscale)
@@ -35,6 +33,52 @@ const std::initializer_list<TestCase> Tests = {
     {496, true},
     // +(32 * 16 * vscale)
     {512, false},
+
+    // inc[h|w|d] increments by the number of 16/32/64bit elements in a
+    // register. mult_imm is in the range [1,16]
+    // +(mult_imm * num_elts * vscale)
+    // +(1 * 8 * vscale), 16 bit
+    {8, true},
+    // +(15 * 8 * vscale), 16 bit
+    {120, true},
+    // +(1 * 4 * vscale), 32 bit
+    {4, true},
+    // +(7 * 4 * vscale), 32 bit
+    {28, true},
+    // +(1 * 2 * vscale), 64 bit
+    {2, true},
+    // +(13 * 2 * vscale), 64 bit
+    {26, true},
+    // +(17 * 8 * vscale), 16 bit, out of range.
+    {136, false},
+    // +(19 * 2 * vscale), 64 bit, out of range.
+    {38, false},
+    // +(21 * 4 * vscale), 32 bit, out of range.
+    {84, false},
+
+    // dec[h|w|d] -- Same as above, but negative.
+    // -(mult_imm * num_elts * vscale)
+    // -(1 * 8 * vscale), 16 bit
+    {-8, true},
+    // -(15 * 8 * vscale), 16 bit
+    {-120, true},
+    // -(1 * 4 * vscale), 32 bit
+    {-4, true},
+    // -(7 * 4 * vscale), 32 bit
+    {-28, true},
+    // -(1 * 2 * vscale), 64 bit
+    {-2, true},
+    // -(13 * 2 * vscale), 64 bit
+    {-26, true},
+    // -(17 * 8 * vscale), 16 bit, out of range.
+    {-136, false},
+    // -(19 * 2 * vscale), 64 bit, out of range.
+    {-38, false},
+    // -(21 * 4 * vscale), 32 bit, out of range.
+    {-84, false},
+
+    // Invalid; not divisible by the above powers of 2.
+    {5, false},
 };
 } // namespace
 
@@ -48,7 +92,7 @@ TEST(Immediates, Immediates) {
   const Target *T = TargetRegistry::lookupTarget(TT, Error);
 
   std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
-      TT, "generic", "+sve", TargetOptions(), std::nullopt, std::nullopt,
+      TT, "generic", "+sve2", TargetOptions(), std::nullopt, std::nullopt,
       CodeGenOptLevel::Default));
   AArch64Subtarget ST(TM->getTargetTriple(), TM->getTargetCPU(),
                       TM->getTargetCPU(), TM->getTargetFeatureString(), *TM,