[llvm] [AArch64] Add support for custom MOVI and MVN (PR #148698)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 11:47:09 PDT 2025
https://github.com/aabhinavg1 updated https://github.com/llvm/llvm-project/pull/148698
>From 2d11ccc7fb2c16d86f911e15ea539d3ed95293a9 Mon Sep 17 00:00:00 2001
From: aabhinavg1 <tiwariabhinavak at gmail.com>
Date: Mon, 14 Jul 2025 23:59:41 +0530
Subject: [PATCH] [AArch64] Add support for custom MOVI and MVN
- Implemented custom pattern matching for MOVI and MVNI vector instructions.
- Added tests to verify MOVI.16b, MOVI.4s, MVNI.4s with shift and without shift.
- Ensured correct codegen for specific immediate constants using AArch64 ISel lowering.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 996 ++++++++++--------
llvm/test/CodeGen/AArch64/movi-custom.ll | 30 +
2 files changed, 567 insertions(+), 459 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/movi-custom.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 55601e6327e98..0a99d3980c9fb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -119,20 +119,20 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::init(false));
static cl::opt<bool>
-EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
- cl::desc("Enable AArch64 logical imm instruction "
- "optimization"),
- cl::init(true));
+ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
+ cl::desc("Enable AArch64 logical imm instruction "
+ "optimization"),
+ cl::init(true));
// Temporary option added for the purpose of testing functionality added
// to DAGCombiner.cpp in D92230. It is expected that this can be removed
// in future when both implementations will be based off MGATHER rather
// than the GLD1 nodes added for the SVE gather load intrinsics.
static cl::opt<bool>
-EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
- cl::desc("Combine extends of AArch64 masked "
- "gather intrinsics"),
- cl::init(true));
+ EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
+ cl::desc("Combine extends of AArch64 masked "
+ "gather intrinsics"),
+ cl::init(true));
static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
cl::desc("Combine ext and trunc to TBL"),
@@ -840,53 +840,53 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// promote v4f16 to v4f32 when that is known to be safe.
auto V4Narrow = MVT::getVectorVT(ScalarVT, 4);
- setOperationPromotedToType(ISD::FADD, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FSUB, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FMUL, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FDIV, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FCEIL, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FFLOOR, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FROUND, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FTRUNC, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FADD, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FSUB, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FMUL, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FDIV, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FCEIL, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FFLOOR, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FROUND, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FTRUNC, V4Narrow, MVT::v4f32);
setOperationPromotedToType(ISD::FROUNDEVEN, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::FRINT, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::FRINT, V4Narrow, MVT::v4f32);
setOperationPromotedToType(ISD::FNEARBYINT, V4Narrow, MVT::v4f32);
setOperationPromotedToType(ISD::FCANONICALIZE, V4Narrow, MVT::v4f32);
- setOperationPromotedToType(ISD::SETCC, V4Narrow, MVT::v4f32);
+ setOperationPromotedToType(ISD::SETCC, V4Narrow, MVT::v4f32);
- setOperationAction(ISD::FABS, V4Narrow, Legal);
- setOperationAction(ISD::FNEG, V4Narrow, Legal);
- setOperationAction(ISD::FMA, V4Narrow, Expand);
- setOperationAction(ISD::BR_CC, V4Narrow, Expand);
- setOperationAction(ISD::SELECT, V4Narrow, Expand);
- setOperationAction(ISD::SELECT_CC, V4Narrow, Expand);
- setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
- setOperationAction(ISD::FSQRT, V4Narrow, Expand);
+ setOperationAction(ISD::FABS, V4Narrow, Legal);
+ setOperationAction(ISD::FNEG, V4Narrow, Legal);
+ setOperationAction(ISD::FMA, V4Narrow, Expand);
+ setOperationAction(ISD::BR_CC, V4Narrow, Expand);
+ setOperationAction(ISD::SELECT, V4Narrow, Expand);
+ setOperationAction(ISD::SELECT_CC, V4Narrow, Expand);
+ setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
+ setOperationAction(ISD::FSQRT, V4Narrow, Expand);
auto V8Narrow = MVT::getVectorVT(ScalarVT, 8);
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
- setOperationPromotedToType(ISD::SETCC, V8Narrow, MVT::v8f32);
-
- setOperationAction(ISD::FABS, V8Narrow, Legal);
- setOperationAction(ISD::FADD, V8Narrow, Legal);
- setOperationAction(ISD::FCEIL, V8Narrow, Legal);
- setOperationAction(ISD::FCOPYSIGN, V8Narrow, Custom);
- setOperationAction(ISD::FDIV, V8Narrow, Legal);
- setOperationAction(ISD::FFLOOR, V8Narrow, Legal);
- setOperationAction(ISD::FMA, V8Narrow, Expand);
- setOperationAction(ISD::FMUL, V8Narrow, Legal);
- setOperationAction(ISD::FNEARBYINT, V8Narrow, Legal);
- setOperationAction(ISD::FNEG, V8Narrow, Legal);
- setOperationAction(ISD::FROUND, V8Narrow, Legal);
- setOperationAction(ISD::FROUNDEVEN, V8Narrow, Legal);
- setOperationAction(ISD::FRINT, V8Narrow, Legal);
- setOperationAction(ISD::FSQRT, V8Narrow, Expand);
- setOperationAction(ISD::FSUB, V8Narrow, Legal);
- setOperationAction(ISD::FTRUNC, V8Narrow, Legal);
- setOperationAction(ISD::BR_CC, V8Narrow, Expand);
- setOperationAction(ISD::SELECT, V8Narrow, Expand);
- setOperationAction(ISD::SELECT_CC, V8Narrow, Expand);
- setOperationAction(ISD::FP_EXTEND, V8Narrow, Expand);
+ setOperationPromotedToType(ISD::SETCC, V8Narrow, MVT::v8f32);
+
+ setOperationAction(ISD::FABS, V8Narrow, Legal);
+ setOperationAction(ISD::FADD, V8Narrow, Legal);
+ setOperationAction(ISD::FCEIL, V8Narrow, Legal);
+ setOperationAction(ISD::FCOPYSIGN, V8Narrow, Custom);
+ setOperationAction(ISD::FDIV, V8Narrow, Legal);
+ setOperationAction(ISD::FFLOOR, V8Narrow, Legal);
+ setOperationAction(ISD::FMA, V8Narrow, Expand);
+ setOperationAction(ISD::FMUL, V8Narrow, Legal);
+ setOperationAction(ISD::FNEARBYINT, V8Narrow, Legal);
+ setOperationAction(ISD::FNEG, V8Narrow, Legal);
+ setOperationAction(ISD::FROUND, V8Narrow, Legal);
+ setOperationAction(ISD::FROUNDEVEN, V8Narrow, Legal);
+ setOperationAction(ISD::FRINT, V8Narrow, Legal);
+ setOperationAction(ISD::FSQRT, V8Narrow, Expand);
+ setOperationAction(ISD::FSUB, V8Narrow, Legal);
+ setOperationAction(ISD::FTRUNC, V8Narrow, Legal);
+ setOperationAction(ISD::BR_CC, V8Narrow, Expand);
+ setOperationAction(ISD::SELECT, V8Narrow, Expand);
+ setOperationAction(ISD::SELECT_CC, V8Narrow, Expand);
+ setOperationAction(ISD::FP_EXTEND, V8Narrow, Expand);
};
if (!Subtarget->hasFullFP16()) {
@@ -1298,8 +1298,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
}
- setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
- setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
@@ -1322,8 +1322,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
// Saturates
- for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
- MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64, MVT::v16i8,
+ MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
@@ -1341,8 +1341,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// Vector reductions
- for (MVT VT : { MVT::v4f16, MVT::v2f32,
- MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
+ for (MVT VT :
+ {MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64}) {
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Legal);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Legal);
@@ -1355,8 +1355,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->hasFullFP16())
setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
- for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
- MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+ for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
+ MVT::v4i32}) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
@@ -1432,18 +1432,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::v2i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
// ADDP custom lowering
- for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
+ for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64})
setOperationAction(ISD::ADD, VT, Custom);
// FADDP custom lowering
- for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
+ for (MVT VT : {MVT::v16f16, MVT::v8f32, MVT::v4f64})
setOperationAction(ISD::FADD, VT, Custom);
if (Subtarget->hasDotProd()) {
@@ -1601,8 +1601,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Custom);
for (auto VT :
- { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
- MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
+ {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
+ MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16})
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
for (auto VT :
@@ -1629,10 +1629,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// NEON doesn't support masked loads/stores, but SME and SVE do.
- for (auto VT :
- {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
- MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+ for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
+ MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+ MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
}
@@ -1858,8 +1857,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
- for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
+ for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+ MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
@@ -2075,8 +2074,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
// F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP
// NEON types.
- if (VT.isFloatingPoint() &&
- VT.getVectorElementType() != MVT::bf16 &&
+ if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
@@ -2484,8 +2482,8 @@ static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
if (NewImm == 0 || NewImm == OrigMask) {
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
TLO.DAG.getConstant(NewImm, DL, VT));
- // Otherwise, create a machine node so that target independent DAG combine
- // doesn't undo this optimization.
+ // Otherwise, create a machine node so that target independent DAG combine
+ // doesn't undo this optimization.
} else {
Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
@@ -2600,6 +2598,36 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)));
break;
}
+ case AArch64ISD::MOVIedit: {
+ if (Op.getNumOperands() < 2 || !isa<ConstantSDNode>(Op.getOperand(0)) ||
+ !isa<ConstantSDNode>(Op.getOperand(1))) {
+ break; // Or assert, or return
+ }
+ uint64_t Val = Op->getConstantOperandVal(0);
+ uint64_t Mask = Op->getConstantOperandVal(1);
+ Known = KnownBits::makeConstant(APInt(Known.getBitWidth(), Val | Mask));
+ break;
+ }
+ case AArch64ISD::MOVImsl: {
+ uint64_t Val = Op->getConstantOperandVal(0);
+ uint64_t Shift = Op->getConstantOperandVal(1);
+ Known = KnownBits::makeConstant(
+ APInt(Known.getBitWidth(), Val * (1ULL << Shift)));
+ break;
+ }
+ case AArch64ISD::MVNIshift: {
+ uint64_t Val = Op->getConstantOperandVal(0);
+ uint64_t Shift = Op->getConstantOperandVal(1);
+ Known = KnownBits::makeConstant(~APInt(Known.getBitWidth(), Val << Shift));
+ break;
+ }
+ case AArch64ISD::MVNImsl: {
+ uint64_t Val = Op->getConstantOperandVal(0);
+ uint64_t Shift = Op->getConstantOperandVal(1);
+ Known = KnownBits::makeConstant(
+ ~APInt(Known.getBitWidth(), Val * (1ULL << Shift)));
+ break;
+ }
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget->isTargetILP32())
@@ -2617,7 +2645,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Intrinsic::ID IntID =
static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
switch (IntID) {
- default: return;
+ default:
+ return;
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
unsigned BitWidth = Known.getBitWidth();
@@ -2639,7 +2668,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
unsigned BitWidth = Known.getBitWidth();
if (VT == MVT::v8i8 || VT == MVT::v16i8) {
- unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
+ unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
assert(BitWidth >= Bound && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
Known.Zero |= Mask;
@@ -2826,8 +2855,9 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
return EndBB;
}
-MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
- MachineInstr &MI, MachineBasicBlock *BB) const {
+MachineBasicBlock *
+AArch64TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
assert(!isAsynchronousEHPersonality(classifyEHPersonality(
BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!");
@@ -3620,11 +3650,11 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
SDValue O1 = Val->getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
- if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+ if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth + 1))
return false;
bool CanNegateR;
bool MustBeFirstR;
- if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+ if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth + 1))
return false;
if (MustBeFirstL && MustBeFirstR)
@@ -3661,8 +3691,8 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
/// \p Negate is true if we want this sub-tree being negated just by changing
/// SETCC conditions.
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
- AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
- AArch64CC::CondCode Predicate) {
+ AArch64CC::CondCode &OutCC, bool Negate,
+ SDValue CCOp, AArch64CC::CondCode Predicate) {
// We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
@@ -4049,10 +4079,9 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
- Overflow =
- DAG.getNode(AArch64ISD::SUBS, DL, VTs,
- DAG.getConstant(0, DL, MVT::i64),
- UpperBits).getValue(1);
+ Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+ DAG.getConstant(0, DL, MVT::i64), UpperBits)
+ .getValue(1);
}
break;
}
@@ -4265,10 +4294,10 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
}
// built the mask value encoding the expected behavior.
- unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
- (!IsData << 3) | // IsDataCache bit
- (Locality << 1) | // Cache level bits
- (unsigned)IsStream; // Stream bit
+ unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
+ (!IsData << 3) | // IsDataCache bit
+ (Locality << 1) | // Cache level bits
+ (unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
DAG.getTargetConstant(PrfOp, DL, MVT::i32),
Op.getOperand(1));
@@ -4702,7 +4731,8 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
SDValue MinC = DAG.getConstant(
APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
- SDValue Min2 = SrcVal2 ? DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt2, MinC) : SDValue();
+ SDValue Min2 = SrcVal2 ? DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt2, MinC)
+ : SDValue();
SDValue MaxC = DAG.getConstant(
APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
@@ -4711,7 +4741,8 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
SDValue MinC = DAG.getConstant(
APInt::getAllOnes(SatWidth).zext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
- Sat2 = SrcVal2 ? DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt2, MinC) : SDValue();
+ Sat2 = SrcVal2 ? DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt2, MinC)
+ : SDValue();
}
if (SrcVal2)
@@ -4771,8 +4802,8 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
} else {
- SDValue MinC = DAG.getConstant(
- APInt::getAllOnes(SatWidth).zext(DstWidth), DL, DstVT);
+ SDValue MinC =
+ DAG.getConstant(APInt::getAllOnes(SatWidth).zext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
}
@@ -4919,7 +4950,7 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
@@ -5077,8 +5108,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
Entry.IsZExt = false;
Args.push_back(Entry);
- RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
- : RTLIB::SINCOS_STRET_F32;
+ RTLIB::Libcall LC =
+ ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
@@ -5221,8 +5252,8 @@ static bool isAddSubSExt(SDValue N, SelectionDAG &DAG) {
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- return N0->hasOneUse() && N1->hasOneUse() &&
- isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) &&
+ isSignExtended(N1, DAG);
}
return false;
}
@@ -5232,8 +5263,8 @@ static bool isAddSubZExt(SDValue N, SelectionDAG &DAG) {
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- return N0->hasOneUse() && N1->hasOneUse() &&
- isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) &&
+ isZeroExtended(N1, DAG);
}
return false;
}
@@ -5613,8 +5644,8 @@ SDValue AArch64TargetLowering::getRuntimePStateSM(SelectionDAG &DAG,
TargetLowering::CallLoweringInfo CLI(DAG);
ArgListTy Args;
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
- CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2,
- RetTy, Callee, std::move(Args));
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2, RetTy,
+ Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Mask = DAG.getConstant(/*PSTATE.SM*/ 1, DL, MVT::i64);
return DAG.getNode(ISD::AND, DL, MVT::i64, CallResult.first.getOperand(0),
@@ -5844,12 +5875,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
}
-SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(0);
SDLoc DL(Op);
switch (IntNo) {
- default: return SDValue(); // Don't custom lower most intrinsics.
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
@@ -6678,9 +6711,8 @@ SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
-static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
- EVT VT, EVT MemVT,
- SelectionDAG &DAG) {
+static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT,
+ EVT MemVT, SelectionDAG &DAG) {
assert(VT.isVector() && "VT should be a vector type");
assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
@@ -6694,19 +6726,19 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
// str s0, [x0]
SDValue Undef = DAG.getUNDEF(MVT::i16);
- SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
- {Undef, Undef, Undef, Undef});
+ SDValue UndefVec =
+ DAG.getBuildVector(MVT::v4i16, DL, {Undef, Undef, Undef, Undef});
- SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
- Value, UndefVec);
+ SDValue TruncExt =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Value, UndefVec);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Trunc, DAG.getConstant(0, DL, MVT::i64));
- return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
- ST->getBasePtr(), ST->getMemOperand());
+ return DAG.getStore(ST->getChain(), DL, ExtractTrunc, ST->getBasePtr(),
+ ST->getMemOperand());
}
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
@@ -6744,11 +6776,10 @@ static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
-SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc Dl(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
- assert (StoreNode && "Can only custom lower store nodes");
+ assert(StoreNode && "Can only custom lower store nodes");
SDValue Value = StoreNode->getValue();
@@ -6808,8 +6839,8 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SDValue Base = StoreNode->getBasePtr();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
- SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
- Value, DAG.getConstant(i, Dl, MVT::i32));
+ SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, Value,
+ DAG.getConstant(i, Dl, MVT::i32));
SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
DAG.getConstant(i * 8, Dl, PtrVT));
Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
@@ -6853,8 +6884,7 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
return Result;
}
-SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
@@ -7019,9 +7049,8 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(0));
// Generate SUBS & CSEL.
- SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
@@ -7742,7 +7771,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs,
DAG.getTargetLoweringInfo(), MF.getDataLayout());
- if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); }))
+ if (any_of(Outs,
+ [](ISD::OutputArg &Out) { return Out.VT.isScalableVector(); }))
FuncInfo->setIsSVECC(true);
// Assign locations to all of the incoming arguments.
@@ -7803,10 +7833,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
- // FIXME: This works on big-endian for composite byvals, which are the common
- // case. It should also work for fundamental types too.
+ // FIXME: This works on big-endian for composite byvals, which are the
+ // common case. It should also work for fundamental types too.
unsigned FrameIdx =
- MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+ MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
@@ -7904,7 +7934,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
? VA.getLocVT().getSizeInBits()
- : VA.getValVT().getSizeInBits()) / 8;
+ : VA.getValVT().getSizeInBits()) /
+ 8;
uint32_t BEAlign = 0;
if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
@@ -7967,8 +7998,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
- assert((VA.getValVT().isScalableVT() ||
- Subtarget->isWindowsArm64EC()) &&
+ assert((VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) &&
"Indirect arguments should be scalable on most subtargets");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinValue();
@@ -8044,12 +8074,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Ensure that the SMSTART happens after the CopyWithChain such that its
// chain result is used.
- for (unsigned I=0; I<InVals.size(); ++I) {
+ for (unsigned I = 0; I < InVals.size(); ++I) {
Register Reg = MF.getRegInfo().createVirtualRegister(
getRegClassFor(InVals[I].getValueType().getSimpleVT()));
Chain = DAG.getCopyToReg(Chain, DL, Reg, InVals[I]);
- InVals[I] = DAG.getCopyFromReg(Chain, DL, Reg,
- InVals[I].getValueType());
+ InVals[I] = DAG.getCopyFromReg(Chain, DL, Reg, InVals[I].getValueType());
}
}
@@ -8081,7 +8110,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
RegParmTypes.push_back(MVT::f128);
// Compute the set of forwarded registers. The rest are scratch.
SmallVectorImpl<ForwardedRegister> &Forwards =
- FuncInfo->getForwardedMustTailRegParms();
+ FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
CC_AArch64_AAPCS);
@@ -8234,7 +8263,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
if (GPRSaveSize & 15)
// The extra size here, if triggered, will always be 8.
- MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
+ MFI.CreateFixedObject(16 - (GPRSaveSize & 15),
+ -(int)alignTo(GPRSaveSize, 16), false);
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
@@ -8437,9 +8467,9 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
if (!UseVarArgCC) {
// Get type of the original argument.
- EVT ActualVT =
- TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
- /*AllowUnknown*/ true);
+ EVT ActualVT = TLI.getValueType(DAG.getDataLayout(),
+ CLI.Args[Outs[i].OrigArgIndex].Ty,
+ /*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
@@ -8536,7 +8566,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
}
}
- if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
+ if (canGuaranteeTCO(CalleeCC,
+ getTargetMachine().Options.GuaranteedTailCallOpt))
return CCMatch;
// Externally-defined functions with weak linkage should not be
@@ -8592,10 +8623,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
- // When we are musttail, additional checks have been done and we can safely ignore this check
- // At least two cases here: if caller is fastcc then we can't have any
- // memory arguments (we'd be expected to clean up the stack afterwards). If
- // caller is C then we could potentially use its argument area.
+ // When we are musttail, additional checks have been done and we can safely
+ // ignore this check At least two cases here: if caller is fastcc then we
+ // can't have any memory arguments (we'd be expected to clean up the stack
+ // afterwards). If caller is C then we could potentially use its argument
+ // area.
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
@@ -9000,10 +9032,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
TPIDR2ObjAddr);
OptimizationRemarkEmitter ORE(&MF.getFunction());
ORE.emit([&]() {
- auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
- CLI.CB)
- : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
- &MF.getFunction());
+ auto R = CLI.CB
+ ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA", CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
+ &MF.getFunction());
return DescribeCallsite(R) << " sets up a lazy save for ZA";
});
} else if (RequiresSaveAllZA) {
@@ -9024,10 +9056,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64);
OptimizationRemarkEmitter ORE(&MF.getFunction());
ORE.emit([&]() {
- auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
- CLI.CB)
- : OptimizationRemarkAnalysis("sme", "SMETransition",
- &MF.getFunction());
+ auto R = CLI.CB
+ ? OptimizationRemarkAnalysis("sme", "SMETransition", CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMETransition",
+ &MF.getFunction());
DescribeCallsite(R) << " requires a streaming mode transition";
return R;
});
@@ -9077,7 +9109,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
- RegsToPass.emplace_back(F.PReg, Val);
+ RegsToPass.emplace_back(F.PReg, Val);
}
}
@@ -9346,8 +9378,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
for (auto &RegToPass : RegsToPass) {
- Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
- RegToPass.second, InGlue);
+ Chain =
+ DAG.getCopyToReg(Chain, DL, RegToPass.first, RegToPass.second, InGlue);
InGlue = Chain.getValue(1);
}
@@ -9450,8 +9482,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add argument registers to the end of the list so that they are known live
// into the call.
for (auto &RegToPass : RegsToPass)
- Ops.push_back(DAG.getRegister(RegToPass.first,
- RegToPass.second.getValueType()));
+ Ops.push_back(
+ DAG.getRegister(RegToPass.first, RegToPass.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
@@ -9591,8 +9623,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Register Reg = MF.getRegInfo().createVirtualRegister(
getRegClassFor(InVals[I].getValueType().getSimpleVT()));
SDValue X = DAG.getCopyToReg(Result, DL, Reg, InVals[I]);
- InVals[I] = DAG.getCopyFromReg(X, DL, Reg,
- InVals[I].getValueType());
+ InVals[I] = DAG.getCopyFromReg(X, DL, Reg, InVals[I].getValueType());
}
}
@@ -9733,7 +9764,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Glue = Chain.getValue(1);
RetOps.push_back(
- DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
@@ -9796,7 +9827,7 @@ SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
N->getOffset(), Flag);
}
-SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
+SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
@@ -10030,8 +10061,7 @@ SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
- return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
- LoVar,
+ return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
@@ -10243,8 +10273,8 @@ AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
// The pointer to the thread's TLS data area is at the TLS Index scaled by 8
// offset into the TLSArray.
TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
- SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
- DAG.getConstant(3, DL, PtrVT));
+ SDValue Slot =
+ DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, DAG.getConstant(3, DL, PtrVT));
SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
MachinePointerInfo());
@@ -11271,8 +11301,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
// (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
// (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
// Both require less instructions than compare and conditional select.
- if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TVal &&
- RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
+ if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TVal && RHSC &&
+ RHSC->isZero() && CFVal && CFVal->isZero() &&
LHS.getValueType() == RHS.getValueType()) {
EVT VT = LHS.getValueType();
SDValue Shift =
@@ -11386,7 +11416,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
FVal = LHS;
} else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
- assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
+ assert(CTVal && CFVal && "Expected constant operands for CSNEG.");
// Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
// avoid materializing C.
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
@@ -11533,8 +11563,7 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
if (Ty == MVT::aarch64svcount) {
TVal = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i1, TVal);
FVal = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i1, FVal);
- SDValue Sel =
- DAG.getNode(ISD::SELECT, DL, MVT::nxv16i1, CCVal, TVal, FVal);
+ SDValue Sel = DAG.getNode(ISD::SELECT, DL, MVT::nxv16i1, CCVal, TVal, FVal);
return DAG.getNode(ISD::BITCAST, DL, Ty, Sel);
}
@@ -11618,8 +11647,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
return getAddr(JT, DAG);
}
-SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
SDLoc DL(Op);
@@ -11706,7 +11734,7 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
BlockAddressSDNode *BAN = cast<BlockAddressSDNode>(Op);
const BlockAddress *BA = BAN->getBlockAddress();
@@ -11741,7 +11769,7 @@ SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
}
SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
@@ -11884,9 +11912,9 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
SDLoc DL(Op);
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
unsigned VaListSize =
- (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
- ? PtrSize
- : Subtarget->isTargetILP32() ? 20 : 32;
+ (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) ? PtrSize
+ : Subtarget->isTargetILP32() ? 20
+ : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
@@ -11960,7 +11988,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue NarrowFP =
DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
DAG.getIntPtrConstant(1, DL, /*isTarget=*/true));
- SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
+ SDValue Ops[] = {NarrowFP, WideFP.getValue(1)};
// Merge the rounded value with the chain output of the load.
return DAG.getMergeValues(Ops, DL);
}
@@ -12003,8 +12031,9 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register AArch64TargetLowering::
-getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
+Register
+AArch64TargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo();
@@ -12126,7 +12155,8 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
}
LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT
- << " imm value: "; Imm.dump(););
+ << " imm value: ";
+ Imm.dump(););
return IsLegal;
}
@@ -12196,8 +12226,8 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
- SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
- Flags);
+ SDValue Step =
+ DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, Flags);
Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
@@ -12225,8 +12255,8 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
// Newton reciprocal iteration: E * (2 - X * E)
// AArch64 reciprocal iteration instruction: (2 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
- SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
- Estimate, Flags);
+ SDValue Step =
+ DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, Estimate, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
@@ -12278,9 +12308,8 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
if (ConstraintVT.isFloatingPoint())
return "w";
- if (ConstraintVT.isVector() &&
- (ConstraintVT.getSizeInBits() == 64 ||
- ConstraintVT.getSizeInBits() == 128))
+ if (ConstraintVT.isVector() && (ConstraintVT.getSizeInBits() == 64 ||
+ ConstraintVT.getSizeInBits() == 128))
return "w";
return "r";
@@ -12928,10 +12957,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int WindowScale;
ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
+ : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
- bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ bool operator==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -13109,8 +13138,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (!SrcVT.is64BitVector()) {
LLVM_DEBUG(
- dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
- "for SVE vectors.");
+ dbgs()
+ << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
+ "for SVE vectors.");
return SDValue();
}
@@ -13161,8 +13191,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
- int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
- VT.getScalarSizeInBits());
+ int BitsDefined =
+ std::min(OrigEltTy.getScalarSizeInBits(), VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -13181,7 +13211,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
return SDValue();
}
- SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ SDValue ShuffleOps[] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
@@ -13234,7 +13264,8 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
// v4i32s. This is really a truncate, which we can construct out of (legal)
// concats and truncate nodes.
-static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) {
+static SDValue ReconstructTruncateFromBuildVector(SDValue V,
+ SelectionDAG &DAG) {
if (V.getValueType() != MVT::v16i8)
return SDValue();
assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");
@@ -13452,8 +13483,8 @@ static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return true;
}
-static bool isINSMask(ArrayRef<int> M, int NumInputElements,
- bool &DstIsLeft, int &Anomaly) {
+static bool isINSMask(ArrayRef<int> M, int NumInputElements, bool &DstIsLeft,
+ int &Anomaly) {
if (M.size() != static_cast<size_t>(NumInputElements))
return false;
@@ -13541,9 +13572,9 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle. ID is the perfect-shuffle
-//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
-//table entry and LHS/RHS are the immediate inputs for this stage of the
-//shuffle.
+// ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
+// table entry and LHS/RHS are the immediate inputs for this stage of the
+// shuffle.
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
@@ -14011,9 +14042,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
for (unsigned LaneSize : {64U, 32U, 16U}) {
unsigned Lane = 0;
if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
- unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
- : LaneSize == 32 ? AArch64ISD::DUPLANE32
- : AArch64ISD::DUPLANE16;
+ unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
+ : LaneSize == 32 ? AArch64ISD::DUPLANE32
+ : AArch64ISD::DUPLANE16;
// Cast V1 to an integer vector with required lane size
MVT NewEltTy = MVT::getIntegerVT(LaneSize);
unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
@@ -14230,7 +14261,6 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
}
-
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
@@ -14255,7 +14285,7 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
// Try 64-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
- const APInt &Bits) {
+ const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
@@ -14292,16 +14322,13 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
Shift = 0;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
Shift = 8;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
Shift = 16;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
Shift = 24;
}
@@ -14345,8 +14372,7 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
Shift = 0;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
Shift = 8;
}
@@ -14385,8 +14411,7 @@ static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
Shift = 264;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
Shift = 272;
}
@@ -14437,9 +14462,8 @@ static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
- }
- else if (isWide &&
- (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
+ } else if (isWide &&
+ (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
MovTy = MVT::v2f64;
}
@@ -14653,16 +14677,15 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
- DefBits, &LHS)) ||
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
- DefBits, &LHS)))
+ if ((NewOp =
+ tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS)) ||
+ (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS)))
return NewOp;
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
- UndefBits, &LHS)) ||
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
- UndefBits, &LHS)))
+ if ((NewOp =
+ tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS)) ||
+ (NewOp =
+ tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS)))
return NewOp;
}
@@ -14672,12 +14695,11 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
// Normalize the operands of BUILD_VECTOR. The value of constant operands will
// be truncated to fit element width.
-static SDValue NormalizeBuildVector(SDValue Op,
- SelectionDAG &DAG) {
+static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc DL(Op);
EVT VT = Op.getValueType();
- EVT EltTy= VT.getVectorElementType();
+ EVT EltTy = VT.getVectorElementType();
if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
return Op;
@@ -15052,8 +15074,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
EVT EltTy = VT.getVectorElementType();
- assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
- EltTy == MVT::f64) && "Unsupported floating-point vector type");
+ assert((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
+ EltTy == MVT::f64) &&
+ "Unsupported floating-point vector type");
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n");
@@ -15685,8 +15708,9 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ElementBits) ||
+ if (!BVN ||
+ !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElementBits) ||
SplatBitSize > ElementBits)
return false;
Cnt = SplatBits.getSExtValue();
@@ -15804,10 +15828,10 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
- MVT::i32),
- Op.getOperand(0), Op.getOperand(1));
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32),
+ Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() &&
@@ -15844,10 +15868,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
// negate the shift amount
SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(1));
- SDValue NegShiftLeft =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
- NegShift);
+ SDValue NegShiftLeft = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Opc, DL, MVT::i32),
+ Op.getOperand(0), NegShift);
return NegShiftLeft;
}
@@ -16181,8 +16204,8 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
- PtrVT, 0);
+ SDValue Callee =
+ DAG.getTargetExternalSymbol(Subtarget->getChkStkName(), PtrVT, 0);
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
@@ -16509,7 +16532,7 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(
// The shift can be combined if it matches the size of the value being
// loaded (and so reducing the width would make it not match).
uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
- uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
+ uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits() / 8;
if (ShiftAmount == Log2_32(LoadBytes))
return false;
}
@@ -16570,10 +16593,10 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
const DataLayout &DL = F->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
- return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
- isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
- (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath));
+ return !(
+ isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
@@ -16632,7 +16655,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
auto &DL = Ext->getDataLayout();
- std::advance(GTI, U.getOperandNo()-1);
+ std::advance(GTI, U.getOperandNo() - 1);
Type *IdxTy = GTI.getIndexedType();
// This extension will end up with a shift because of the scaling factor.
// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
@@ -17752,8 +17775,9 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
- const AddrMode &AMode, Type *Ty,
- unsigned AS, Instruction *I) const {
+ const AddrMode &AMode,
+ Type *Ty, unsigned AS,
+ Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
@@ -17890,9 +17914,8 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
// site. Hence we include LR in the scratch registers, which are in turn added
// as implicit-defs for stackmaps and patchpoints.
- static const MCPhysReg ScratchRegs[] = {
- AArch64::X16, AArch64::X17, AArch64::LR, 0
- };
+ static const MCPhysReg ScratchRegs[] = {AArch64::X16, AArch64::X17,
+ AArch64::LR, 0};
return ScratchRegs;
}
@@ -17901,9 +17924,8 @@ ArrayRef<MCPhysReg> AArch64TargetLowering::getRoundingControlRegisters() const {
return RCRegs;
}
-bool
-AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
- CombineLevel Level) const {
+bool AArch64TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
@@ -18579,7 +18601,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
}
static std::optional<unsigned> IsSVECntIntrinsic(SDValue S) {
- switch(getIntrinsicID(S.getNode())) {
+ switch (getIntrinsicID(S.getNode())) {
default:
break;
case Intrinsic::aarch64_sve_cntb:
@@ -18875,11 +18897,10 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// Allow the scaling to be folded into the `cnt` instruction by preventing
// the scaling to be obscured here. This makes it easier to pattern match.
- if (IsSVECntIntrinsic(N0) ||
- (N0->getOpcode() == ISD::TRUNCATE &&
- (IsSVECntIntrinsic(N0->getOperand(0)))))
- if (ConstValue.sge(1) && ConstValue.sle(16))
- return SDValue();
+ if (IsSVECntIntrinsic(N0) || (N0->getOpcode() == ISD::TRUNCATE &&
+ (IsSVECntIntrinsic(N0->getOperand(0)))))
+ if (ConstValue.sge(1) && ConstValue.sle(16))
+ return SDValue();
// Multiplication of a power of two plus/minus one can be done more
// cheaply as shift+add/sub. For now, this is true unilaterally. If
@@ -18897,8 +18918,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
- if (N0->hasOneUse() && (isSignExtended(N0, DAG) ||
- isZeroExtended(N0, DAG)))
+ if (N0->hasOneUse() && (isSignExtended(N0, DAG) || isZeroExtended(N0, DAG)))
return SDValue();
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into madd or msub.
@@ -19201,8 +19221,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
// to use the new Chain.
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
- unsigned Opcode =
- (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
+ unsigned Opcode = (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF
+ : AArch64ISD::UITOF;
return DAG.getNode(Opcode, SDLoc(N), VT, Load);
}
@@ -19569,8 +19589,8 @@ static SDValue performSVEAndCombine(SDNode *N,
Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
- SDValue And = DAG.getNode(ISD::AND, DL,
- UnpkOp->getValueType(0), UnpkOp, Dup);
+ SDValue And =
+ DAG.getNode(ISD::AND, DL, UnpkOp->getValueType(0), UnpkOp, Dup);
return DAG.getNode(Opc, DL, N->getValueType(0), And);
}
@@ -19680,7 +19700,7 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
- if (SDValue R = performANDSETCCCombine(N,DCI))
+ if (SDValue R = performANDSETCCCombine(N, DCI))
return R;
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -19955,8 +19975,7 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// with the strict_fadd, but we also need uses of the chain output of the
// original strict_fadd to use the chain output of the new strict_fadd as
// otherwise it may not be deleted.
- SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
- {VT, MVT::Other},
+ SDValue Ret = DAG.getNode(N0->getOpcode(), DL, {VT, MVT::Other},
{N0->getOperand(0), Extract1, Extract2});
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1));
@@ -20489,11 +20508,11 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
}
// Returns true if Op is setcc or zext of setcc.
-static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
+static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info) {
if (isSetCC(Op, Info))
return true;
return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
- isSetCC(Op->getOperand(0), Info));
+ isSetCC(Op->getOperand(0), Info));
}
// The folding we want to perform is:
@@ -20931,9 +20950,9 @@ static SDValue performBuildVectorCombine(SDNode *N,
SDValue HalfToSingle =
DAG.getNode(ISD::FP_EXTEND, DL, MVT::v4f32, SrcVec);
SDValue SubvectorIdx = Elt0->getOperand(0)->getOperand(1);
- SDValue Extract = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT.changeVectorElementType(MVT::f32),
- HalfToSingle, SubvectorIdx);
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ VT.changeVectorElementType(MVT::f32),
+ HalfToSingle, SubvectorIdx);
return DAG.getNode(ISD::FP_EXTEND, DL, VT, Extract);
}
}
@@ -20970,7 +20989,8 @@ static SDValue performBuildVectorCombine(SDNode *N,
if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
return SDValue();
- SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
+ SDValue SubvectorIdx =
+ DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
@@ -21263,8 +21283,10 @@ static bool isLoadOrMultipleLoads(SDValue B, SmallVector<LoadSDNode *> &Loads) {
// are lowered. Note that this only comes up because we do not always visit
// operands before uses. After that is fixed this can be removed and in the
// meantime this is fairly specific to the lowering we expect from IR.
- // t46: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,8,9,10,11,16,17,18,19> t44, t45
- // t44: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,16,17,18,19,u,u,u,u> t42, t43
+ // t46: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,8,9,10,11,16,17,18,19> t44,
+ // t45
+ // t44: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,16,17,18,19,u,u,u,u> t42,
+ // t43
// t42: v16i8 = concat_vectors t40, t36, undef:v4i8, undef:v4i8
// t40: v4i8,ch = load<(load (s32) from %ir.17)> t0, t22, undef:i64
// t36: v4i8,ch = load<(load (s32) from %ir.13)> t0, t18, undef:i64
@@ -21765,7 +21787,7 @@ static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
case Intrinsic::aarch64_sve_cmphs_wide:
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
- case Intrinsic::aarch64_sve_cmpls_wide: {
+ case Intrinsic::aarch64_sve_cmpls_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
uint64_t ImmVal = CN->getZExtValue();
if (ImmVal <= 127)
@@ -22632,9 +22654,8 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
uint64_t BaseOffset = 0;
const MachinePointerInfo &PtrInfo = St.getPointerInfo();
- SDValue NewST1 =
- DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
- OrigAlignment, St.getMemOperand()->getFlags());
+ SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
+ OrigAlignment, St.getMemOperand()->getFlags());
// As this in ISel, we will not merge this add which may degrade results.
if (BasePtr->getOpcode() == ISD::ADD &&
@@ -22699,10 +22720,10 @@ static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
ContainerVT = getSVEContainerType(ContainerVT);
SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
- SDValue Ops[] = { N->getOperand(0), // Chain
- N->getOperand(2), // Pg
- N->getOperand(3), // Base
- DAG.getValueType(VT) };
+ SDValue Ops[] = {N->getOperand(0), // Chain
+ N->getOperand(2), // Pg
+ N->getOperand(3), // Base
+ DAG.getValueType(VT)};
SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
@@ -22710,7 +22731,7 @@ static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
if (ContainerVT.isInteger() && (VT != ContainerVT))
Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
- return DAG.getMergeValues({ Load, LoadChain }, DL);
+ return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
@@ -22724,16 +22745,16 @@ static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
auto *MINode = cast<MemIntrinsicSDNode>(N);
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
- SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
- MINode->getOperand(3), DAG.getUNDEF(PtrTy),
- MINode->getOperand(2), PassThru,
- MINode->getMemoryVT(), MINode->getMemOperand(),
- ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
+ SDValue L =
+ DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), MINode->getOperand(3),
+ DAG.getUNDEF(PtrTy), MINode->getOperand(2), PassThru,
+ MINode->getMemoryVT(), MINode->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
- if (VT.isFloatingPoint()) {
- SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
- return DAG.getMergeValues(Ops, DL);
- }
+ if (VT.isFloatingPoint()) {
+ SDValue Ops[] = {DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1)};
+ return DAG.getMergeValues(Ops, DL);
+ }
return L;
}
@@ -22776,12 +22797,11 @@ static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
- SDValue Ops[] = { N->getOperand(0), // Chain
- SrcNew,
- N->getOperand(4), // Base
- N->getOperand(3), // Pg
- InputVT
- };
+ SDValue Ops[] = {N->getOperand(0), // Chain
+ SrcNew,
+ N->getOperand(4), // Base
+ N->getOperand(3), // Pg
+ InputVT};
return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
}
@@ -22933,7 +22953,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
}
// Check that all vector element locations were inserted to.
if (IndexNotInserted.any())
- return SDValue();
+ return SDValue();
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
@@ -23537,19 +23557,19 @@ static SDValue performPostLD1Combine(SDNode *N,
continue;
SmallVector<SDValue, 8> Ops;
- Ops.push_back(LD->getOperand(0)); // Chain
+ Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
- Ops.push_back(Vector); // The vector to be inserted
- Ops.push_back(Lane); // The lane to be inserted in the vector
+ Ops.push_back(Vector); // The vector to be inserted
+ Ops.push_back(Lane); // The lane to be inserted in the vector
}
Ops.push_back(Addr);
Ops.push_back(Inc);
- EVT Tys[3] = { VT, MVT::i64, MVT::Other };
+ EVT Tys[3] = {VT, MVT::i64, MVT::Other};
SDVTList SDTys = DAG.getVTList(Tys);
- unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
- MemVT,
+ unsigned NewOp =
+ IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, MemVT,
LoadSDN->getMemOperand());
// Update the uses.
@@ -23558,8 +23578,8 @@ static SDValue performPostLD1Combine(SDNode *N,
SDValue(UpdN.getNode(), 2) // Chain
};
DCI.CombineTo(LD, NewResults);
- DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
- DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
+ DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
break;
}
@@ -24368,49 +24388,110 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
unsigned NumVecs = 0;
unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
- default: llvm_unreachable("unexpected intrinsic for Neon base update");
- case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
- NumVecs = 2; break;
- case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
- NumVecs = 3; break;
- case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
- NumVecs = 4; break;
- case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
- NumVecs = 2; IsStore = true; break;
- case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
- NumVecs = 3; IsStore = true; break;
- case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
- NumVecs = 4; IsStore = true; break;
- case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
- NumVecs = 2; break;
- case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
- NumVecs = 3; break;
- case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
- NumVecs = 4; break;
- case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
- NumVecs = 2; IsStore = true; break;
- case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
- NumVecs = 3; IsStore = true; break;
- case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
- NumVecs = 4; IsStore = true; break;
- case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
- NumVecs = 2; IsDupOp = true; break;
- case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
- NumVecs = 3; IsDupOp = true; break;
- case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
- NumVecs = 4; IsDupOp = true; break;
- case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
- NumVecs = 2; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
- NumVecs = 3; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
- NumVecs = 4; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
- NumVecs = 2; IsStore = true; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
- NumVecs = 3; IsStore = true; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
- NumVecs = 4; IsStore = true; IsLaneOp = true; break;
+ default:
+ llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::aarch64_neon_ld2:
+ NewOpc = AArch64ISD::LD2post;
+ NumVecs = 2;
+ break;
+ case Intrinsic::aarch64_neon_ld3:
+ NewOpc = AArch64ISD::LD3post;
+ NumVecs = 3;
+ break;
+ case Intrinsic::aarch64_neon_ld4:
+ NewOpc = AArch64ISD::LD4post;
+ NumVecs = 4;
+ break;
+ case Intrinsic::aarch64_neon_st2:
+ NewOpc = AArch64ISD::ST2post;
+ NumVecs = 2;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st3:
+ NewOpc = AArch64ISD::ST3post;
+ NumVecs = 3;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st4:
+ NewOpc = AArch64ISD::ST4post;
+ NumVecs = 4;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_ld1x2:
+ NewOpc = AArch64ISD::LD1x2post;
+ NumVecs = 2;
+ break;
+ case Intrinsic::aarch64_neon_ld1x3:
+ NewOpc = AArch64ISD::LD1x3post;
+ NumVecs = 3;
+ break;
+ case Intrinsic::aarch64_neon_ld1x4:
+ NewOpc = AArch64ISD::LD1x4post;
+ NumVecs = 4;
+ break;
+ case Intrinsic::aarch64_neon_st1x2:
+ NewOpc = AArch64ISD::ST1x2post;
+ NumVecs = 2;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st1x3:
+ NewOpc = AArch64ISD::ST1x3post;
+ NumVecs = 3;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st1x4:
+ NewOpc = AArch64ISD::ST1x4post;
+ NumVecs = 4;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_ld2r:
+ NewOpc = AArch64ISD::LD2DUPpost;
+ NumVecs = 2;
+ IsDupOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld3r:
+ NewOpc = AArch64ISD::LD3DUPpost;
+ NumVecs = 3;
+ IsDupOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld4r:
+ NewOpc = AArch64ISD::LD4DUPpost;
+ NumVecs = 4;
+ IsDupOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld2lane:
+ NewOpc = AArch64ISD::LD2LANEpost;
+ NumVecs = 2;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld3lane:
+ NewOpc = AArch64ISD::LD3LANEpost;
+ NumVecs = 3;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld4lane:
+ NewOpc = AArch64ISD::LD4LANEpost;
+ NumVecs = 4;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_st2lane:
+ NewOpc = AArch64ISD::ST2LANEpost;
+ NumVecs = 2;
+ IsStore = true;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_st3lane:
+ NewOpc = AArch64ISD::ST3LANEpost;
+ NumVecs = 3;
+ IsStore = true;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_st4lane:
+ NewOpc = AArch64ISD::ST4LANEpost;
+ NumVecs = 4;
+ IsStore = true;
+ IsLaneOp = true;
+ break;
}
EVT VecTy;
@@ -24445,14 +24526,14 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = VecTy;
- Tys[n++] = MVT::i64; // Type of write back register
- Tys[n] = MVT::Other; // Type of the chain
+ Tys[n++] = MVT::i64; // Type of write back register
+ Tys[n] = MVT::Other; // Type of the chain
SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
- MemInt->getMemoryVT(),
- MemInt->getMemOperand());
+ SDValue UpdN =
+ DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
+ MemInt->getMemoryVT(), MemInt->getMemOperand());
// Update the uses.
std::vector<SDValue> NewResults;
@@ -24470,16 +24551,16 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
// Checks to see if the value is the prescribed width and returns information
// about its extension mode.
-static
-bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
+static bool checkValueWidth(SDValue V, unsigned width,
+ ISD::LoadExtType &ExtType) {
ExtType = ISD::NON_EXTLOAD;
- switch(V.getNode()->getOpcode()) {
+ switch (V.getNode()->getOpcode()) {
default:
return false;
case ISD::LOAD: {
LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
- if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
- || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
+ if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) ||
+ (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
ExtType = LoadNode->getExtensionType();
return true;
}
@@ -24487,8 +24568,8 @@ bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
}
case ISD::AssertSext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
- if ((TypeNode->getVT() == MVT::i8 && width == 8)
- || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ if ((TypeNode->getVT() == MVT::i8 && width == 8) ||
+ (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::SEXTLOAD;
return true;
}
@@ -24496,8 +24577,8 @@ bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
}
case ISD::AssertZext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
- if ((TypeNode->getVT() == MVT::i8 && width == 8)
- || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ if ((TypeNode->getVT() == MVT::i8 && width == 8) ||
+ (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::ZEXTLOAD;
return true;
}
@@ -24588,9 +24669,9 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
// the whole range we can just adjust the input and avoid writing equations
// for sign extended inputs.
if (ExtType == ISD::SEXTLOAD)
- AddConstant -= (1 << (width-1));
+ AddConstant -= (1 << (width - 1));
- switch(CC) {
+ switch (CC) {
case AArch64CC::LE:
case AArch64CC::GT:
if ((AddConstant == 0) ||
@@ -24601,22 +24682,20 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
break;
case AArch64CC::LT:
case AArch64CC::GE:
- if ((AddConstant == 0) ||
- (AddConstant >= 0 && CompConstant <= 0) ||
+ if ((AddConstant == 0) || (AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::HI:
case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
- (AddConstant <= 0 && CompConstant >= -1 &&
- CompConstant < AddConstant + MaxUInt))
+ (AddConstant <= 0 && CompConstant >= -1 &&
+ CompConstant < AddConstant + MaxUInt))
return true;
- break;
+ break;
case AArch64CC::PL:
case AArch64CC::MI:
- if ((AddConstant == 0) ||
- (AddConstant > 0 && CompConstant <= 0) ||
+ if ((AddConstant == 0) || (AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
break;
@@ -24697,11 +24776,10 @@ static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode,
return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
}
-static
-SDValue performCONDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG, unsigned CCIndex,
- unsigned CmpIndex) {
+static SDValue performCONDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG, unsigned CCIndex,
+ unsigned CmpIndex) {
unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
unsigned CondOpcode = SubsNode->getOpcode();
@@ -24756,19 +24834,20 @@ SDValue performCONDCombine(SDNode *N,
if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
- !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
+ !checkValueWidth(AddInputValue1, MaskBits, ExtType))
return SDValue();
- if(!isEquivalentMaskless(CC, MaskBits, ExtType,
- cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
- cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
+ if (!isEquivalentMaskless(
+ CC, MaskBits, ExtType,
+ cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
+ cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
return SDValue();
// The AND is not necessary, remove it.
- SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
- SubsNode->getValueType(1));
- SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
+ SDVTList VTs =
+ DAG.getVTList(SubsNode->getValueType(0), SubsNode->getValueType(1));
+ SDValue Ops[] = {AddValue, SubsNode->getOperand(1)};
SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
@@ -25147,7 +25226,7 @@ static SDValue performCSELCombine(SDNode *N,
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
- return Folded;
+ return Folded;
// CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
// if SUB(y, x) already exists and we can produce a swapped predicate for cc.
@@ -25303,8 +25382,8 @@ static SDValue performSETCCCombine(SDNode *N,
if (FromVT.isFixedLengthVector() &&
FromVT.getVectorElementType() == MVT::i1) {
bool IsNull = isNullConstant(RHS);
- LHS = DAG.getNode(IsNull ? ISD::VECREDUCE_OR : ISD::VECREDUCE_AND,
- DL, MVT::i1, LHS->getOperand(0));
+ LHS = DAG.getNode(IsNull ? ISD::VECREDUCE_OR : ISD::VECREDUCE_AND, DL,
+ MVT::i1, LHS->getOperand(0));
LHS = DAG.getNode(IsNull ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, DL, ToVT,
LHS);
return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
@@ -25659,8 +25738,7 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
- IfTrue, IfFalse);
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, IfTrue, IfFalse);
}
/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
@@ -25717,17 +25795,15 @@ static SDValue performSelectCombine(SDNode *N,
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
- SDValue LHS =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
- SDValue RHS =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
+ SDValue LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
// Now duplicate the comparison mask we want across all other lanes.
SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
- Mask = DAG.getNode(ISD::BITCAST, DL,
- ResVT.changeVectorElementTypeToInteger(), Mask);
+ Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(),
+ Mask);
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
@@ -25850,8 +25926,7 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
const GlobalValue *GV = GN->getGlobal();
Type *T = GV->getValueType();
- if (!T->isSized() ||
- Offset > GV->getDataLayout().getTypeAllocSize(T))
+ if (!T->isSized() || Offset > GV->getDataLayout().getTypeAllocSize(T))
return SDValue();
SDLoc DL(GN);
@@ -26147,8 +26222,8 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// Sign extend of an unsigned unpack -> signed unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
- unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
- : AArch64ISD::SUNPKLO;
+ unsigned SOpc =
+ Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI : AArch64ISD::SUNPKLO;
// Push the sign extend to the operand of the unpack
// This is necessary where, for example, the operand of the unpack
@@ -27453,12 +27528,12 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc DL(V.getNode());
auto [VLo, VHi] = DAG.SplitScalar(V, DL, MVT::i64, MVT::i64);
if (DAG.getDataLayout().isBigEndian())
- std::swap (VLo, VHi);
+ std::swap(VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, DL, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, DL, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, DL, MVT::i32);
- const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+ const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1};
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops), 0);
}
@@ -27477,8 +27552,8 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
SDValue Ops[] = {
createGPRPairNode(DAG, N->getOperand(2)), // Compare value
createGPRPairNode(DAG, N->getOperand(3)), // Store value
- N->getOperand(1), // Ptr
- N->getOperand(0), // Chain in
+ N->getOperand(1), // Ptr
+ N->getOperand(0), // Chain in
};
unsigned Opcode;
@@ -27797,8 +27872,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
LoadNode->getMemoryVT() != MVT::i128) {
- // Non-volatile or atomic loads are optimized later in AArch64's load/store
- // optimizer.
+ // Non-volatile or atomic loads are optimized later in AArch64's
+ // load/store optimizer.
return;
}
@@ -27850,8 +27925,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
"custom lowering for unexpected type");
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
- auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
- N->getOperand(1), Op2, N->getOperand(3));
+ auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32, N->getOperand(1),
+ Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
@@ -27860,8 +27935,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
"custom lowering for unexpected type");
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
- auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
- N->getOperand(1), Op2, N->getOperand(3));
+ auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32, N->getOperand(1),
+ Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
@@ -27869,8 +27944,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
assert((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type");
SDLoc DL(N);
- auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
- N->getOperand(1), N->getOperand(2));
+ auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32, N->getOperand(1),
+ N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
@@ -27878,8 +27953,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
assert((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type");
SDLoc DL(N);
- auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
- N->getOperand(1), N->getOperand(2));
+ auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32, N->getOperand(1),
+ N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
@@ -28222,7 +28297,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
return Builder.CreateBitCast(Or, ValueTy);
}
- Type *Tys[] = { Addr->getType() };
+ Type *Tys[] = {Addr->getType()};
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
@@ -28267,11 +28342,12 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
- Type *Tys[] = { Addr->getType() };
+ Type *Tys[] = {Addr->getType()};
Function *Stxr = Intrinsic::getOrInsertDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
- IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
+ IntegerType *IntValTy =
+ Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
Val = Builder.CreateBitCast(Val, IntValTy);
CallInst *CI = Builder.CreateCall(
@@ -28401,7 +28477,7 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
// may be beneficial to sink in other cases, but we would have to check that
// the cmp would not get folded into the br to form a cbz for these to be
// beneficial.
- ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
return false;
return Mask->getValue().isPowerOf2();
@@ -28462,9 +28538,9 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
- assert(Entry->getParent()->getFunction().hasFnAttribute(
- Attribute::NoUnwind) &&
- "Function should be nounwind in insertCopiesSplitCSR!");
+ assert(
+ Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
@@ -29202,9 +29278,9 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
- assert((!V.getValueType().isVector() ||
- V.getValueType().isScalableVector()) &&
- "Only scalable vectors are supported!");
+ assert(
+ (!V.getValueType().isVector() || V.getValueType().isScalableVector()) &&
+ "Only scalable vectors are supported!");
Operands.push_back(V);
}
@@ -29246,8 +29322,9 @@ SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
return convertFromScalableVector(DAG, VT, ScalableRes);
}
-SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
- SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
+ SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue AccOp = ScalarOp.getOperand(0);
SDValue VecOp = ScalarOp.getOperand(1);
@@ -29268,14 +29345,15 @@ SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
DAG.getUNDEF(ContainerVT), AccOp, Zero);
// Perform reduction.
- SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
- Pg, AccOp, VecOp);
+ SDValue Rdx =
+ DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT, Pg, AccOp, VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
}
-SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
- SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
+ SelectionDAG &DAG) const {
SDLoc DL(ReduceOp);
SDValue Op = ReduceOp.getOperand(0);
EVT OpVT = Op.getValueType();
@@ -29346,16 +29424,16 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
}
// UADDV always returns an i64 result.
- EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
- SrcVT.getVectorElementType();
+ EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64
+ : SrcVT.getVectorElementType();
EVT RdxVT = SrcVT;
if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
RdxVT = getPackedSVEVectorVT(ResVT);
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
- SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
- Rdx, DAG.getConstant(0, DL, MVT::i64));
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx,
+ DAG.getConstant(0, DL, MVT::i64));
// The VEC_REDUCE nodes expect an element size result.
if (ResVT != ScalarOp.getValueType())
@@ -29364,9 +29442,8 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
return Res;
}
-SDValue
-AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
@@ -29383,8 +29460,7 @@ AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
Mask = DAG.getNode(ISD::TRUNCATE, DL,
MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
- auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
- Mask, Op1, Op2);
+ auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT, Mask, Op1, Op2);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
@@ -29483,16 +29559,16 @@ AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
SDValue Pg = getPredicateForVector(DAG, DL, VT);
EVT SrcVT = Val.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
- EVT ExtendVT = ContainerVT.changeVectorElementType(
- SrcVT.getVectorElementType());
+ EVT ExtendVT =
+ ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
Val = getSVESafeBitCast(ExtendVT, Val, DAG);
- Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
- Pg, Val, DAG.getUNDEF(ContainerVT));
+ Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, Pg,
+ Val, DAG.getUNDEF(ContainerVT));
return convertFromScalableVector(DAG, VT, Val);
}
@@ -29507,8 +29583,8 @@ AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
- EVT RoundVT = ContainerSrcVT.changeVectorElementType(
- VT.getVectorElementType());
+ EVT RoundVT =
+ ContainerSrcVT.changeVectorElementType(VT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
@@ -29822,7 +29898,7 @@ AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
if (VT.bitsGT(SrcVT)) {
EVT CvtVT = ContainerDstVT.changeVectorElementType(
- ContainerSrcVT.getVectorElementType());
+ ContainerSrcVT.getVectorElementType());
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
@@ -30145,8 +30221,8 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
SDLoc DL(Op);
EVT InVT = Op.getValueType();
- assert(VT.isScalableVector() && isTypeLegal(VT) &&
- InVT.isScalableVector() && isTypeLegal(InVT) &&
+ assert(VT.isScalableVector() && isTypeLegal(VT) && InVT.isScalableVector() &&
+ isTypeLegal(InVT) &&
"Only expect to cast between legal scalable vector types!");
assert(VT.getVectorElementType() != MVT::i1 &&
InVT.getVectorElementType() != MVT::i1 &&
@@ -30285,9 +30361,12 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
}
bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
- return Op.getOpcode() == AArch64ISD::DUP ||
- Op.getOpcode() == AArch64ISD::MOVI ||
- (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ unsigned Opc = Op.getOpcode();
+ return Opc == AArch64ISD::DUP || Opc == AArch64ISD::MOVI ||
+ Opc == AArch64ISD::MOVIshift || Opc == AArch64ISD::MOVIedit ||
+ Opc == AArch64ISD::MOVImsl || Opc == AArch64ISD::MVNIshift ||
+ Opc == AArch64ISD::MVNImsl ||
+ (Opc == ISD::EXTRACT_SUBVECTOR &&
Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
TargetLowering::isTargetCanonicalConstantNode(Op);
}
@@ -30399,7 +30478,6 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
Intrinsic::aarch64_neon_vcmla_rot180,
Intrinsic::aarch64_neon_vcmla_rot270};
-
return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,
{Accumulator, InputA, InputB});
}
diff --git a/llvm/test/CodeGen/AArch64/movi-custom.ll b/llvm/test/CodeGen/AArch64/movi-custom.ll
new file mode 100644
index 0000000000000..14c8fd5c994d9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/movi-custom.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple=aarch64-linux-gnu < %s -o - | FileCheck %s
+
+; Test 1: AArch64ISD::MOVIedit
+define <16 x i8> @test_movi_edit() {
+; CHECK-LABEL: test_movi_edit:
+; CHECK: movi v0.16b, #63
+ ret <16 x i8> <i8 63, i8 63, i8 63, i8 63, i8 63, i8 63, i8 63, i8 63,
+ i8 63, i8 63, i8 63, i8 63, i8 63, i8 63, i8 63, i8 63>
+}
+
+; Test 2: AArch64ISD::MOVImsl
+define <4 x i32> @test_movi_msl() {
+; CHECK-LABEL: test_movi_msl:
+; CHECK: movi v0.4s, #64
+ ret <4 x i32> <i32 64, i32 64, i32 64, i32 64>
+}
+
+; Test 3: AArch64ISD::MVNIshift
+define <4 x i32> @test_mvni_shift() {
+; CHECK-LABEL: test_mvni_shift:
+; CHECK: movi v0.2d, #0xffff00ffffff00ff
+ ret <4 x i32> <i32 -65281, i32 -65281, i32 -65281, i32 -65281>
+}
+
+; Test 4: AArch64ISD::MVNImsl
+define <4 x i32> @test_mvnimsl() {
+; CHECK-LABEL: test_mvnimsl:
+; CHECK: mvni v0.4s, #64
+ ret <4 x i32> <i32 -65, i32 -65, i32 -65, i32 -65>
+}
More information about the llvm-commits
mailing list