[llvm] 7e815dd - [AArch64][SME] Create new interface for isSVEAvailable.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 1 05:01:10 PDT 2023
Author: Sander de Smalen
Date: 2023-09-01T12:00:36Z
New Revision: 7e815dd76daa2e986d89101df56e05c06d88eb44
URL: https://github.com/llvm/llvm-project/commit/7e815dd76daa2e986d89101df56e05c06d88eb44
DIFF: https://github.com/llvm/llvm-project/commit/7e815dd76daa2e986d89101df56e05c06d88eb44.diff
LOG: [AArch64][SME] Create new interface for isSVEAvailable.
When a function is compiled to be in Streaming(-compatible) mode, the full
set of SVE instructions may not be available. This patch adds an interface
to query that and changes the codegen for FADDA (not legal in Streaming-SVE
mode) to instead be expanded for fixed-length vectors, or otherwise not to
code-generate for scalable vectors.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D156109
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b7bfc7563a23bb..da9eda6903eb71 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1459,7 +1459,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ if (Subtarget->isSVEAvailable())
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -1519,9 +1520,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- // NEON doesn't support across-vector reductions, but SVE does.
- for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ if (Subtarget->isSVEAvailable()) {
+ // NEON doesn't support across-vector reductions, but SVE does.
+ for (auto VT :
+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ }
if (!Subtarget->isNeonAvailable()) {
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
@@ -1879,7 +1883,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT,
+ StreamingSVE ? Expand : Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 450e27b8a2af08..8946c0b71e2ba4 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -477,14 +477,16 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
bool AArch64Subtarget::useAA() const { return UseAA; }
-bool AArch64Subtarget::isNeonAvailable() const {
- if (!hasNEON())
- return false;
+bool AArch64Subtarget::isStreamingCompatible() const {
+ return StreamingCompatibleSVEMode || ForceStreamingCompatibleSVE;
+}
- // The 'force-streaming-comaptible-sve' flag overrides the streaming
- // function attributes.
- if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
- return !ForceStreamingCompatibleSVE;
+bool AArch64Subtarget::isNeonAvailable() const {
+ return hasNEON() && !isStreaming() && !isStreamingCompatible();
+}
- return !isStreaming() && !isStreamingCompatible();
+bool AArch64Subtarget::isSVEAvailable() const{
+ // FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet
+ // as we don't yet support the feature in LLVM.
+ return hasSVE() && !isStreaming() && !isStreamingCompatible();
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5790d1a8ac815e..dc54da477eaa15 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -204,20 +204,28 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool isXRaySupported() const override { return true; }
- /// Returns true if the function has the streaming attribute.
+ /// Returns true if the function has a streaming body.
bool isStreaming() const { return StreamingSVEMode; }
- /// Returns true if the function has the streaming-compatible attribute.
- bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }
+ /// Returns true if the function has a streaming-compatible body.
+ bool isStreamingCompatible() const;
/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).
bool isNeonAvailable() const;
+ /// Returns true if the target has SVE and can use the full range of SVE
+ /// instructions, for example because it knows the function is known not to be
+ /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
+ bool isSVEAvailable() const;
+
unsigned getMinVectorRegisterBitWidth() const {
- // Don't assume any minimum vector size when PSTATE.SM may not be 0.
- if (StreamingSVEMode || StreamingCompatibleSVEMode)
+ // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
+ // we don't yet support streaming-compatible codegen support that we trust
+ // is safe for functions that may be executed in streaming-SVE mode.
+ // By returning '0' here, we disable vectorization.
+ if (!isSVEAvailable() && !isNeonAvailable())
return 0;
return MinVectorRegisterBitWidth;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b7dfe2212f4280..404b5f170d061e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1943,8 +1943,7 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:
- if ((ST->isStreaming() || ST->isStreamingCompatible()) &&
- !EnableScalableAutovecInStreamingMode)
+ if (!ST->isSVEAvailable() && !EnableScalableAutovecInStreamingMode)
return TypeSize::getScalable(0);
return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
index 3235dd37e19e4b..460d8a8694bc4a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve < %s | FileCheck %s
-; FIXME: Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile!
-; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
+; Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile!
+; RUN: not --crash llc -mattr=+sve -force-streaming-compatible-sve < %s
target triple = "aarch64-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
index a94870815d42f3..d2d771c48c2044 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
@@ -10,11 +10,14 @@ target triple = "aarch64-unknown-linux-gnu"
define half @fadda_v4f16(half %start, <4 x half> %a) {
; CHECK-LABEL: fadda_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: fadda h0, p0, h0, z1.h
-; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: fadd h0, h0, h1
+; CHECK-NEXT: mov z2.h, z1.h[1]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[2]
+; CHECK-NEXT: mov z1.h, z1.h[3]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
ret half %res
@@ -23,11 +26,22 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
define half @fadda_v8f16(half %start, <8 x half> %a) {
; CHECK-LABEL: fadda_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: fadda h0, p0, h0, z1.h
-; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: fadd h0, h0, h1
+; CHECK-NEXT: mov z2.h, z1.h[1]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[2]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[3]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[4]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[5]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[6]
+; CHECK-NEXT: mov z1.h, z1.h[7]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
%res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
ret half %res
@@ -36,13 +50,38 @@ define half @fadda_v8f16(half %start, <8 x half> %a) {
define half @fadda_v16f16(half %start, ptr %a) {
; CHECK-LABEL: fadda_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldr q1, [x0]
-; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT: fadda h0, p0, h0, z1.h
+; CHECK-NEXT: fadd h0, h0, h1
+; CHECK-NEXT: mov z2.h, z1.h[1]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[2]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[3]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[4]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[5]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[6]
+; CHECK-NEXT: mov z1.h, z1.h[7]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ldr q1, [x0, #16]
-; CHECK-NEXT: fadda h0, p0, h0, z1.h
-; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
+; CHECK-NEXT: mov z2.h, z1.h[1]
+; CHECK-NEXT: fadd h0, h0, h1
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[2]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[3]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[4]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[5]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: mov z2.h, z1.h[6]
+; CHECK-NEXT: mov z1.h, z1.h[7]
+; CHECK-NEXT: fadd h0, h0, h2
+; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
@@ -52,11 +91,10 @@ define half @fadda_v16f16(half %start, ptr %a) {
define float @fadda_v2f32(float %start, <2 x float> %a) {
; CHECK-LABEL: fadda_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: fadda s0, p0, s0, z1.s
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: mov z1.s, z1.s[1]
+; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
ret float %res
@@ -65,11 +103,14 @@ define float @fadda_v2f32(float %start, <2 x float> %a) {
define float @fadda_v4f32(float %start, <4 x float> %a) {
; CHECK-LABEL: fadda_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: fadda s0, p0, s0, z1.s
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: mov z2.s, z1.s[1]
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: mov z2.s, z1.s[2]
+; CHECK-NEXT: mov z1.s, z1.s[3]
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
ret float %res
@@ -78,13 +119,22 @@ define float @fadda_v4f32(float %start, <4 x float> %a) {
define float @fadda_v8f32(float %start, ptr %a) {
; CHECK-LABEL: fadda_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q1, [x0]
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: fadda s0, p0, s0, z1.s
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: mov z2.s, z1.s[1]
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: mov z2.s, z1.s[2]
+; CHECK-NEXT: mov z1.s, z1.s[3]
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ldr q1, [x0, #16]
-; CHECK-NEXT: fadda s0, p0, s0, z1.s
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: mov z2.s, z1.s[1]
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: mov z2.s, z1.s[2]
+; CHECK-NEXT: mov z1.s, z1.s[3]
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
@@ -104,11 +154,10 @@ define double @fadda_v1f64(double %start, <1 x double> %a) {
define double @fadda_v2f64(double %start, <2 x double> %a) {
; CHECK-LABEL: fadda_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: fadda d0, p0, d0, z1.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: mov z1.d, z1.d[1]
+; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
%res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
ret double %res
@@ -117,13 +166,14 @@ define double @fadda_v2f64(double %start, <2 x double> %a) {
define double @fadda_v4f64(double %start, ptr %a) {
; CHECK-LABEL: fadda_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q1, [x0]
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fadda d0, p0, d0, z1.d
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: mov z1.d, z1.d[1]
+; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ldr q1, [x0, #16]
-; CHECK-NEXT: fadda d0, p0, d0, z1.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: mov z1.d, z1.d[1]
+; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
More information about the llvm-commits
mailing list