[llvm] [LoongArch] Permit auto-vectorization using LSX/LASX with `auto-vec` feature (PR #78943)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 21 22:14:34 PST 2024
https://github.com/wangleiat created https://github.com/llvm/llvm-project/pull/78943
With enough codegen complete, we can now correctly report the size of vector registers for LSX/LASX, allowing auto vectorization (The `auto-vec` feature needs to be enabled simultaneously).
As described, the `auto-vec` feature is an experimental one. To ensure that automatic vectorization is not enabled by default, because the information provided by the current `TTI` cannot yield additional benefits for automatic vectorization.
>From 2306bbd6e26e2410ba79cd795ae053b2f76fa30c Mon Sep 17 00:00:00 2001
From: wanglei <wanglei at loongson.cn>
Date: Wed, 20 Dec 2023 15:56:31 +0800
Subject: [PATCH] [LoongArch] Permit auto-vectorization using LSX/LASX with
`auto-vec` feature
With enough codegen complete, we can now correctly report the size of
vector registers for LSX/LASX, allowing auto vectorization (The
"auto-vec" feature needs to be enabled simultaneously).
As described, the `auto-vec` feature is an experimental one. To ensure
that automatic vectorization is not enabled by default, because the
information provided by the current `TTI` cannot yield additional
benefits for automatic vectorization.
---
llvm/lib/Target/LoongArch/LoongArch.td | 5 ++
.../lib/Target/LoongArch/LoongArchSubtarget.h | 2 +
.../LoongArchTargetTransformInfo.cpp | 18 +++++
.../LoongArch/LoongArchTargetTransformInfo.h | 2 +
.../LoopVectorize/LoongArch/defaults.ll | 65 +++++++++++++++++++
.../LoopVectorize/LoongArch/lit.local.cfg | 4 ++
6 files changed, 96 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll
create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 75b65fe69f26291..4cffaf573b918ab 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -106,6 +106,11 @@ def FeatureRelax
: SubtargetFeature<"relax", "HasLinkerRelax", "true",
"Enable Linker relaxation">;
+// Experimental auto vectorization
+def FeatureAutoVec
+ : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true",
+ "Experimental auto vectorization">;
+
//===----------------------------------------------------------------------===//
// Registers, instruction descriptions ...
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 5c173675cca4ccb..174e4cba8326334 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -44,6 +44,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
bool HasLaLocalWithAbs = false;
bool HasUAL = false;
bool HasLinkerRelax = false;
+ bool HasExpAutoVec = false;
unsigned GRLen = 32;
MVT GRLenVT = MVT::i32;
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
@@ -102,6 +103,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; }
bool hasUAL() const { return HasUAL; }
bool hasLinkerRelax() const { return HasLinkerRelax; }
+ bool hasExpAutoVec() const { return HasExpAutoVec; }
MVT getGRLenVT() const { return GRLenVT; }
unsigned getGRLen() const { return GRLen; }
LoongArchABI::ABI getTargetABI() const { return TargetABI; }
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index a6de86eea1166e3..04349aa52b54089 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -19,4 +19,22 @@ using namespace llvm;
#define DEBUG_TYPE "loongarchtti"
+TypeSize LoongArchTTIImpl::getRegisterBitWidth(
+ TargetTransformInfo::RegisterKind K) const {
+ switch (K) {
+ case TargetTransformInfo::RGK_Scalar:
+ return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
+ case TargetTransformInfo::RGK_FixedWidthVector:
+ if (ST->hasExtLASX() && ST->hasExpAutoVec())
+ return TypeSize::getFixed(256);
+ if (ST->hasExtLSX() && ST->hasExpAutoVec())
+ return TypeSize::getFixed(128);
+ return TypeSize::getFixed(0);
+ case TargetTransformInfo::RGK_ScalableVector:
+ return TypeSize::getScalable(0);
+ }
+
+ llvm_unreachable("Unsupported register kind");
+}
+
// TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 9e02f793ba8a91e..d296c9ed576fbd3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -39,6 +39,8 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
+
// TODO: Implement more hooks to provide TTI machinery for LoongArch.
};
diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll
new file mode 100644
index 000000000000000..d0a2b442fcbfd34
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S 2>&1 | FileCheck %s
+
+;; This is a collection of tests whose only purpose is to show changes in the
+;; default configuration. Please keep these tests minimal - if you're testing
+;; functionality of some specific configuration, please place that in a
+;; seperate test file with a hard coded configuration (even if that
+;; configuration is the current default).
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+target triple = "loongarch64"
+
+define void @vector_add(ptr noalias nocapture %a, i64 %v) {
+; CHECK-LABEL: define void @vector_add(
+; CHECK-SAME: ptr noalias nocapture [[A:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
+; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
+ %elem = load i64, ptr %arrayidx
+ %add = add i64 %elem, %v
+ store i64 %add, ptr %arrayidx
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg
new file mode 100644
index 000000000000000..9570af17fe5f1fc
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = [".ll"]
+
+if not "LoongArch" in config.root.targets:
+ config.unsupported = True
More information about the llvm-commits
mailing list