[llvm] ec6af93 - [AArch64] NFC: Replace 'forceStreamingCompatibleSVE' with 'isNeonAvailable'.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 17 01:24:56 PDT 2023
Author: Sander de Smalen
Date: 2023-07-17T08:24:10Z
New Revision: ec6af93d0249d03a5babd547e072e4de3a2b5e48
URL: https://github.com/llvm/llvm-project/commit/ec6af93d0249d03a5babd547e072e4de3a2b5e48
DIFF: https://github.com/llvm/llvm-project/commit/ec6af93d0249d03a5babd547e072e4de3a2b5e48.diff
LOG: [AArch64] NFC: Replace 'forceStreamingCompatibleSVE' with 'isNeonAvailable'.
The AArch64Subtarget interface 'isNeonAvailable' is more appropriate going
forward, as we may also want to generate 'streaming SVE' code (not just
'streaming-compatible SVE' code), but here we must still make sure not to
use NEON instructions which are invalid in streaming SVE mode.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8059592623713f..f730df454d01e0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1508,7 +1508,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- if (Subtarget->forceStreamingCompatibleSVE()) {
+ if (!Subtarget->isNeonAvailable()) {
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Custom);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Custom);
@@ -3623,7 +3623,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
SDValue Sel = Op.getOperand(0);
@@ -3833,8 +3833,7 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
if (VT.isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthFPExtendToSVE(Op, DAG);
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
@@ -3850,8 +3849,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
- if (useSVEForFixedLengthVectorVT(SrcVT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(SrcVT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthFPRoundToSVE(Op, DAG);
if (SrcVT != MVT::f128) {
@@ -3882,10 +3880,8 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
return LowerToPredicatedOp(Op, DAG, Opcode);
}
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()) ||
- useSVEForFixedLengthVectorVT(InVT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
+ useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthFPToIntToSVE(Op, DAG);
unsigned NumElts = InVT.getVectorNumElements();
@@ -4139,10 +4135,8 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
return LowerToPredicatedOp(Op, DAG, Opcode);
}
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()) ||
- useSVEForFixedLengthVectorVT(InVT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
+ useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthIntToFPToSVE(Op, DAG);
uint64_t VTSize = VT.getFixedSizeInBits();
@@ -4592,9 +4586,7 @@ static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG,
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- // If SVE is available then i64 vector multiplications can also be made legal.
- bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE();
-
+ bool OverrideNEON = !Subtarget->isNeonAvailable();
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
@@ -6041,7 +6033,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerMLOAD(Op, DAG);
case ISD::LOAD:
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
@@ -8541,7 +8533,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
if (VT.isFixedLengthVector() &&
- useSVEForFixedLengthVectorVT(VT, Subtarget->forceStreamingCompatibleSVE())) {
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
In1 = convertToScalableVector(DAG, ContainerVT, In1);
@@ -8674,8 +8666,7 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
assert(!IsParity && "ISD::PARITY of vector types not supported");
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
@@ -9289,8 +9280,7 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
- if (useSVEForFixedLengthVectorVT(Ty,
- Subtarget->forceStreamingCompatibleSVE())) {
+ if (useSVEForFixedLengthVectorVT(Ty, !Subtarget->isNeonAvailable())) {
// FIXME: Ideally this would be the same as above using i1 types, however
// for the moment we can't deal with fixed i1 vector types properly, so
// instead extend the predicate to a result type sized integer vector.
@@ -11635,8 +11625,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
// Convert shuffles that are directly supported on NEON to target-specific
@@ -11809,8 +11798,7 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 &&
@@ -11932,7 +11920,7 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const SDValue *LHS = nullptr) {
EVT VT = Op.getValueType();
if (VT.isFixedLengthVector() &&
- DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
+ !DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable())
return SDValue();
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
@@ -11985,7 +11973,7 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const SDValue *LHS = nullptr) {
EVT VT = Op.getValueType();
if (VT.isFixedLengthVector() &&
- DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
+ !DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable())
return SDValue();
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
@@ -12218,7 +12206,7 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
@@ -12338,8 +12326,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE())) {
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
@@ -12785,7 +12772,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
assert(Op.getValueType().isScalableVector() &&
@@ -12824,7 +12811,7 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthInsertVectorElt(Op, DAG);
EVT VT = Op.getOperand(0).getValueType();
@@ -12872,8 +12859,7 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
}
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthExtractVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
@@ -12935,11 +12921,10 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
- InVT.getSizeInBits() == 128 && !Subtarget->forceStreamingCompatibleSVE())
+ InVT.getSizeInBits() == 128 && Subtarget->isNeonAvailable())
return Op;
- if (useSVEForFixedLengthVectorVT(InVT,
- Subtarget->forceStreamingCompatibleSVE())) {
+ if (useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
@@ -13127,8 +13112,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return false;
if (VT.getVectorNumElements() == 4 &&
@@ -13221,7 +13205,7 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
return SDValue();
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
return SDValue();
@@ -13240,8 +13224,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
switch (Op.getOpcode()) {
case ISD::SHL:
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
@@ -13254,8 +13237,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- VT, Subtarget->forceStreamingCompatibleSVE())) {
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
: AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
@@ -13393,7 +13375,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
@@ -13571,7 +13553,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();
- bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE() ||
+ bool OverrideNEON = !Subtarget->isNeonAvailable() ||
Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
Op.getOpcode() == ISD::VECREDUCE_XOR ||
@@ -14812,7 +14794,7 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType(
}
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- if (Subtarget->forceStreamingCompatibleSVE() ||
+ if (!Subtarget->isNeonAvailable() ||
(Subtarget->useSVEForFixedLengthVectors() &&
(VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
(VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
@@ -16551,7 +16533,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
- if (!Subtarget->hasNEON() || Subtarget->forceStreamingCompatibleSVE())
+ if (!Subtarget->isNeonAvailable())
return SDValue();
if (!N->getValueType(0).isSimple())
@@ -16764,8 +16746,7 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// It also doesn't work for streaming mode because it causes generating
// bsl instructions that are invalid in streaming mode.
if (TLI.useSVEForFixedLengthVectorVT(
- VT,
- DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE()))
+ VT, !DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable()))
return SDValue();
SDValue N0 = N->getOperand(0);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index d49b82290ff3bd..bd03ffaafab108 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3719,16 +3719,16 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
- if (Subtarget.forceStreamingCompatibleSVE()) {
+ if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
.addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
- } else if (Subtarget.hasNEON()) {
+ else if (Subtarget.hasNEON())
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
- } else {
+ else {
BuildMI(MBB, I, DL, get(AArch64::STRQpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(SrcReg, getKillRegState(KillSrc))
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a63a36de050ae9..2509edc6c82933 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -262,7 +262,7 @@ def UseNegativeImmediates
def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
-def NotInStreamingSVEMode : Predicate<"!Subtarget->forceStreamingCompatibleSVE()">;
+def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
@@ -7595,7 +7595,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
// Same as above, but the first element is populated using
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
-let Predicates = [NotInStreamingSVEMode] in {
+let Predicates = [IsNeonAvailable] in {
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
SDPatternOperator ExtLoad, Instruction LD1>
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 280b68acf71072..0a2dbc817184f0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3159,7 +3159,7 @@ let Predicates = [HasSVEorSME] in {
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
// Extract element from vector with immediate index that's within the bottom 128-bits.
- let Predicates = [NotInStreamingSVEMode], AddedComplexity = 1 in {
+ let Predicates = [IsNeonAvailable], AddedComplexity = 1 in {
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
(i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
@@ -3168,9 +3168,9 @@ let Predicates = [HasSVEorSME] in {
(i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
- } // End NotInStreamingSVEMode
+ } // End IsNeonAvailable
- let Predicates = [NotInStreamingSVEMode] in {
+ let Predicates = [IsNeonAvailable] in {
def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
(i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(sext_inreg (anyext (i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index))), i8),
@@ -3183,7 +3183,7 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(sext (i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index))),
(i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
- } // End NotInStreamingSVEMode
+ } // End IsNeonAvailable
// Extract first element from vector.
let AddedComplexity = 2 in {
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 670f84d9ed5157..d27dddefcfdd4a 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -64,9 +64,11 @@ ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
"Should only be used for testing register allocator."),
cl::CommaSeparated, cl::Hidden);
-static cl::opt<bool>
- ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
- cl::init(false), cl::Hidden);
+static cl::opt<bool> ForceStreamingCompatibleSVE(
+ "force-streaming-compatible-sve",
+ cl::desc(
+ "Force the use of streaming-compatible SVE code for all functions"),
+ cl::Hidden);
unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
@@ -473,10 +475,9 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
bool AArch64Subtarget::useAA() const { return UseAA; }
-bool AArch64Subtarget::forceStreamingCompatibleSVE() const {
- if (ForceStreamingCompatibleSVE) {
- assert(hasSVEorSME() && "Expected SVE to be available");
- return hasSVEorSME();
- }
- return false;
+bool AArch64Subtarget::isNeonAvailable() const {
+ if (!hasNEON())
+ return false;
+
+ return !ForceStreamingCompatibleSVE;
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 0357f809f56a55..d4c136d69272b4 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -202,6 +202,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool isXRaySupported() const override { return true; }
+ /// Returns true if the target has NEON and the function at runtime is known
+ /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
+ /// mode, which disables NEON instructions).
+ bool isNeonAvailable() const;
+
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0.
if (!isStreamingSVEModeDisabled())
@@ -380,7 +385,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
}
bool useSVEForFixedLengthVectors() const {
- if (forceStreamingCompatibleSVE())
+ if (!isNeonAvailable())
return true;
// Prefer NEON unless larger SVE registers are available.
@@ -391,11 +396,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
return false;
return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
- forceStreamingCompatibleSVE();
+ !isNeonAvailable();
}
- bool forceStreamingCompatibleSVE() const;
-
unsigned getVScaleForTuning() const { return VScaleForTuning; }
TailFoldingOpts getSVETailFoldingDefaultOpts() const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7947ccf0a71b92..72ac539775fe84 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -216,7 +216,7 @@ bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const {
assert(K != TargetTransformInfo::RGK_Scalar);
return (K == TargetTransformInfo::RGK_FixedWidthVector &&
- !ST->forceStreamingCompatibleSVE());
+ ST->isNeonAvailable());
}
/// Calculate the cost of materializing a 64-bit value. This helper
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 7cb49126d09107..787cb3c5d34b59 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -267,7 +267,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
}
bool isLegalMaskedGatherScatter(Type *DataType) const {
- if (!ST->hasSVE() || ST->forceStreamingCompatibleSVE())
+ if (!ST->hasSVE() || !ST->isNeonAvailable())
return false;
// For fixed vectors, scalarize if not using SVE for them.
More information about the llvm-commits
mailing list