[llvm] 08fd44b - [AArch64] Force streaming-compatible codegen when attributes are set.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 18 03:26:42 PDT 2023
Author: Sander de Smalen
Date: 2023-07-18T10:26:00Z
New Revision: 08fd44b30063f7a896e76be7f47b79622dba1427
URL: https://github.com/llvm/llvm-project/commit/08fd44b30063f7a896e76be7f47b79622dba1427
DIFF: https://github.com/llvm/llvm-project/commit/08fd44b30063f7a896e76be7f47b79622dba1427.diff
LOG: [AArch64] Force streaming-compatible codegen when attributes are set.
Before this patch, the only way to generate streaming-compatible code
was to use the `-force-streaming-compatible-sve` flag, but the compiler
should also avoid the use of instructions invalid in streaming mode
when a function has the aarch64_pstate_sm_enabled/compatible attribute.
Reviewed By: paulwalker-arm, david-arm
Differential Revision: https://reviews.llvm.org/D155428
Added:
Modified:
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index d27dddefcfdd4a..450e27b8a2af08 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -292,13 +292,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride,
- bool StreamingSVEModeDisabled)
+ bool StreamingSVEMode,
+ bool StreamingCompatibleSVEMode)
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
- StreamingSVEModeDisabled(StreamingSVEModeDisabled),
+ StreamingSVEMode(StreamingSVEMode),
+ StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
@@ -479,5 +481,10 @@ bool AArch64Subtarget::isNeonAvailable() const {
if (!hasNEON())
return false;
- return !ForceStreamingCompatibleSVE;
+ // The 'force-streaming-comaptible-sve' flag overrides the streaming
+ // function attributes.
+ if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
+ return !ForceStreamingCompatibleSVE;
+
+ return !isStreaming() && !isStreamingCompatible();
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index d4c136d69272b4..9ab86684856eca 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -124,7 +124,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool IsLittle;
- bool StreamingSVEModeDisabled;
+ bool StreamingSVEMode;
+ bool StreamingCompatibleSVEMode;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
@@ -163,7 +164,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
StringRef FS, const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0,
- bool StreamingSVEModeDisabled = true);
+ bool StreamingSVEMode = false,
+ bool StreamingCompatibleSVEMode = false);
// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
@@ -202,6 +204,12 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool isXRaySupported() const override { return true; }
+ /// Returns true if the function has the streaming attribute.
+ bool isStreaming() const { return StreamingSVEMode; }
+
+ /// Returns true if the function has the streaming-compatible attribute.
+ bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }
+
/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).
@@ -209,7 +217,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0.
- if (!isStreamingSVEModeDisabled())
+ if (StreamingSVEMode || StreamingCompatibleSVEMode)
return 0;
return MinVectorRegisterBitWidth;
}
@@ -416,8 +424,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
return "__security_check_cookie_arm64ec";
return "__security_check_cookie";
}
-
- bool isStreamingSVEModeDisabled() const { return StreamingSVEModeDisabled; }
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index c7a6bb563ce668..0c017bce8aaa27 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -391,10 +391,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
- bool StreamingSVEModeDisabled =
- !F.hasFnAttribute("aarch64_pstate_sm_enabled") &&
- !F.hasFnAttribute("aarch64_pstate_sm_compatible") &&
- !F.hasFnAttribute("aarch64_pstate_sm_body");
+ bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
+ F.hasFnAttribute("aarch64_pstate_sm_body");
+ bool StreamingCompatibleSVEMode =
+ F.hasFnAttribute("aarch64_pstate_sm_compatible");
unsigned MinSVEVectorSize = 0;
unsigned MaxSVEVectorSize = 0;
@@ -427,8 +427,11 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
SmallString<512> Key;
raw_svector_ostream(Key) << "SVEMin" << MinSVEVectorSize << "SVEMax"
- << MaxSVEVectorSize << "StreamingSVEModeDisabled="
- << StreamingSVEModeDisabled << CPU << TuneCPU << FS;
+ << MaxSVEVectorSize
+ << "StreamingSVEMode=" << StreamingSVEMode
+ << "StreamingCompatibleSVEMode="
+ << StreamingCompatibleSVEMode << CPU << TuneCPU
+ << FS;
auto &I = SubtargetMap[Key];
if (!I) {
@@ -438,8 +441,14 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
- MaxSVEVectorSize, StreamingSVEModeDisabled);
+ MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
}
+
+ assert((!StreamingSVEMode || I->hasSME()) &&
+ "Expected SME to be available");
+ assert((!StreamingCompatibleSVEMode || I->hasSVEorSME()) &&
+ "Expected SVE or SME to be available");
+
return I.get();
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 72ac539775fe84..19fa856bf76e8b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1925,8 +1925,7 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(64);
case TargetTransformInfo::RGK_FixedWidthVector:
- if (!ST->isStreamingSVEModeDisabled() &&
- !EnableFixedwidthAutovecInStreamingMode)
+ if (!ST->isNeonAvailable() && !EnableFixedwidthAutovecInStreamingMode)
return TypeSize::getFixed(0);
if (ST->hasSVE())
@@ -1935,7 +1934,8 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:
- if (!ST->isStreamingSVEModeDisabled() && !EnableScalableAutovecInStreamingMode)
+ if ((ST->isStreaming() || ST->isStreamingCompatible()) &&
+ !EnableScalableAutovecInStreamingMode)
return TypeSize::getScalable(0);
return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index ecf82f8c438bea..214a5ce38f276b 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -123,34 +123,42 @@ define void @streaming_compatible_caller_and_callee() "aarch64_pstate_sm_compati
define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind #0 {
; CHECK-LABEL: streaming_compatible_with_neon_vectors:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #112
-; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: bl normal_callee_vec_arg
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
-; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d
-; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #112
+; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call <2 x double> @normal_callee_vec_arg(<2 x double> %arg)
%fadd = fadd <2 x double> %res, %arg
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll
index 473c516ab2d7ea..b89d09f258963c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=loop-vectorize -force-streaming-compatible-sve -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=SC_SVE
+; RUN: opt < %s -passes=loop-vectorize -force-streaming-compatible-sve -enable-fixedwidth-autovec-in-streaming-mode -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=SC_SVE
; RUN: opt < %s -passes=loop-vectorize -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=NO_SC_SVE
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
More information about the llvm-commits
mailing list