[llvm] [AArch64] Optimized generated assembly for bool to svbool_t conversions (PR #83001)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 03:58:56 PST 2024
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/83001
>From 3c4270c28d42ac798c3674bc51f16ca30ae0320d Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Mon, 26 Feb 2024 13:03:52 +0000
Subject: [PATCH 1/4] [AArch64] Optimized generated assembly for bool to
svbool_t conversions
The original assembly was generating `AND(WHILELO, SPLAT 1)` pattern when only `WHILELO` was necessary
---
.../Target/AArch64/AArch64ISelLowering.cpp | 1 +
.../AArch64/sve-intrinsics-reinterpret.ll | 42 ++++++++++++++++++-
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a3b7e3128ac1a4..dba3a787734721 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -276,6 +276,7 @@ static bool isZeroingInactiveLanes(SDValue Op) {
if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
return true;
return false;
+ case ISD::SPLAT_VECTOR:
case AArch64ISD::PTRUE:
case AArch64ISD::SETCC_MERGE_ZERO:
return true;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index 82bf756f822898..c7c102f5d567d9 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
@@ -150,6 +150,46 @@ define <vscale x 16 x i1> @chained_reinterpret() {
ret <vscale x 16 x i1> %out
}
+define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
+; CHECK-LABEL: reinterpret_scalar_bool_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfx x8, x0, #0, #1
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: ret
+ %.splatinsert = insertelement <vscale x 8 x i1> poison, i1 %x, i64 0
+ %.splat = shufflevector <vscale x 8 x i1> %.splatinsert, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %.splat)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
+; CHECK-LABEL: reinterpret_scalar_bool_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfx x8, x0, #0, #1
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: ret
+ %.splatinsert = insertelement <vscale x 4 x i1> poison, i1 %x, i64 0
+ %.splat = shufflevector <vscale x 4 x i1> %.splatinsert, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %.splat)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @reinterpret_scalar_bool_q(i1 %x){
+; CHECK-LABEL: reinterpret_scalar_bool_q:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfx x8, x0, #0, #1
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: ret
+ %.splatinsert = insertelement <vscale x 2 x i1> poison, i1 %x, i64 0
+ %.splat = shufflevector <vscale x 2 x i1> %.splatinsert, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %.splat)
+ ret <vscale x 16 x i1> %out
+}
+
+
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
>From 8724b327c830ec9caa6ab75f6a21f1da49fcb641 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 28 Feb 2024 11:11:19 +0000
Subject: [PATCH 2/4] Cleaned up default behaviour of isZeroingInactiveLanes
and added seeing through bitcasts
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dba3a787734721..5b1be2705d3740 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -269,13 +269,14 @@ static bool isMergePassthruOpcode(unsigned Opc) {
// Returns true if inactive lanes are known to be zeroed by construction.
static bool isZeroingInactiveLanes(SDValue Op) {
+ // Skip bitcasts nodes
+ while (Op->getOpcode() == ISD::BITCAST)
+ Op = Op->getOperand(0);
+
switch (Op.getOpcode()) {
default:
- // We guarantee i1 splat_vectors to zero the other lanes by
- // implementing it with ptrue and possibly a punpklo for nxv1i1.
- if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
- return true;
return false;
+ // We guarantee i1 splat_vectors to zero the other lanes
case ISD::SPLAT_VECTOR:
case AArch64ISD::PTRUE:
case AArch64ISD::SETCC_MERGE_ZERO:
>From 7d47f2b74bddee18b12aafa124c7a78e36643f38 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 28 Feb 2024 11:50:18 +0000
Subject: [PATCH 3/4] test
---
.../Target/AArch64/AArch64ISelLowering.cpp | 1343 +++++++++--------
1 file changed, 715 insertions(+), 628 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5b1be2705d3740..90f68a77867db5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -120,20 +120,20 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::init(false));
static cl::opt<bool>
-EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
- cl::desc("Enable AArch64 logical imm instruction "
- "optimization"),
- cl::init(true));
+ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
+ cl::desc("Enable AArch64 logical imm instruction "
+ "optimization"),
+ cl::init(true));
// Temporary option added for the purpose of testing functionality added
// to DAGCombiner.cpp in D92230. It is expected that this can be removed
// in future when both implementations will be based off MGATHER rather
// than the GLD1 nodes added for the SVE gather load intrinsics.
static cl::opt<bool>
-EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
- cl::desc("Combine extends of AArch64 masked "
- "gather intrinsics"),
- cl::init(true));
+ EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
+ cl::desc("Combine extends of AArch64 masked "
+ "gather intrinsics"),
+ cl::init(true));
static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
cl::desc("Combine ext and trunc to TBL"),
@@ -272,7 +272,7 @@ static bool isZeroingInactiveLanes(SDValue Op) {
// Skip bitcasts nodes
while (Op->getOpcode() == ISD::BITCAST)
Op = Op->getOperand(0);
-
+
switch (Op.getOpcode()) {
default:
return false;
@@ -681,35 +681,59 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
- for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
- ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
- ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
- ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::STRICT_FREM,
- ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
- ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
- ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
+ ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+ ISD::STRICT_FREM, ISD::STRICT_FPOW, ISD::STRICT_FPOWI,
+ ISD::STRICT_FCOS, ISD::STRICT_FSIN, ISD::STRICT_FEXP,
+ ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2,
+ ISD::STRICT_FLOG10}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
}
if (!Subtarget->hasFullFP16()) {
- for (auto Op :
- {ISD::SETCC, ISD::SELECT_CC,
- ISD::BR_CC, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FDIV, ISD::FMA,
- ISD::FNEG, ISD::FABS, ISD::FCEIL,
- ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
- ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
- ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
- ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
- ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
- ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
- ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
- ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
- ISD::STRICT_FMAXIMUM})
+ for (auto Op : {ISD::SETCC,
+ ISD::SELECT_CC,
+ ISD::BR_CC,
+ ISD::FADD,
+ ISD::FSUB,
+ ISD::FMUL,
+ ISD::FDIV,
+ ISD::FMA,
+ ISD::FNEG,
+ ISD::FABS,
+ ISD::FCEIL,
+ ISD::FSQRT,
+ ISD::FFLOOR,
+ ISD::FNEARBYINT,
+ ISD::FRINT,
+ ISD::FROUND,
+ ISD::FROUNDEVEN,
+ ISD::FTRUNC,
+ ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FMINIMUM,
+ ISD::FMAXIMUM,
+ ISD::STRICT_FADD,
+ ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV,
+ ISD::STRICT_FMA,
+ ISD::STRICT_FCEIL,
+ ISD::STRICT_FFLOOR,
+ ISD::STRICT_FSQRT,
+ ISD::STRICT_FRINT,
+ ISD::STRICT_FNEARBYINT,
+ ISD::STRICT_FROUND,
+ ISD::STRICT_FTRUNC,
+ ISD::STRICT_FROUNDEVEN,
+ ISD::STRICT_FMINNUM,
+ ISD::STRICT_FMAXNUM,
+ ISD::STRICT_FMINIMUM,
+ ISD::STRICT_FMAXIMUM})
setOperationAction(Op, MVT::f16, Promote);
// Round-to-integer need custom lowering for fp16, as Promote doesn't work
@@ -725,44 +749,44 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
setOperationPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
- setOperationAction(ISD::FABS, MVT::v4f16, Expand);
- setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
- setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
- setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
- setOperationAction(ISD::FMA, MVT::v4f16, Expand);
- setOperationAction(ISD::SETCC, MVT::v4f16, Custom);
- setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
- setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
- setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
- setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
- setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
- setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
- setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
-
- setOperationAction(ISD::FABS, MVT::v8f16, Expand);
- setOperationAction(ISD::FADD, MVT::v8f16, Expand);
- setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
- setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
- setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
- setOperationAction(ISD::FMA, MVT::v8f16, Expand);
- setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
- setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
- setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
- setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
- setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
- setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
- setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
- setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
- setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
- setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
- setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FABS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4f16, Custom);
+ setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
+
+ setOperationAction(ISD::FABS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FADD, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
+ setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
}
// AArch64 has implementations of a lot of rounding-like FP operations.
@@ -1078,21 +1102,45 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
- for (auto Op :
- {ISD::SELECT, ISD::SELECT_CC,
- ISD::BR_CC, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FDIV, ISD::FMA,
- ISD::FNEG, ISD::FABS, ISD::FCEIL,
- ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
- ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
- ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
- ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
- ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
- ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
- ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
- ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
- ISD::STRICT_FMAXIMUM})
+ for (auto Op : {ISD::SELECT,
+ ISD::SELECT_CC,
+ ISD::BR_CC,
+ ISD::FADD,
+ ISD::FSUB,
+ ISD::FMUL,
+ ISD::FDIV,
+ ISD::FMA,
+ ISD::FNEG,
+ ISD::FABS,
+ ISD::FCEIL,
+ ISD::FSQRT,
+ ISD::FFLOOR,
+ ISD::FNEARBYINT,
+ ISD::FRINT,
+ ISD::FROUND,
+ ISD::FROUNDEVEN,
+ ISD::FTRUNC,
+ ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FMINIMUM,
+ ISD::FMAXIMUM,
+ ISD::STRICT_FADD,
+ ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV,
+ ISD::STRICT_FMA,
+ ISD::STRICT_FCEIL,
+ ISD::STRICT_FFLOOR,
+ ISD::STRICT_FSQRT,
+ ISD::STRICT_FRINT,
+ ISD::STRICT_FNEARBYINT,
+ ISD::STRICT_FROUND,
+ ISD::STRICT_FTRUNC,
+ ISD::STRICT_FROUNDEVEN,
+ ISD::STRICT_FMINNUM,
+ ISD::STRICT_FMAXNUM,
+ ISD::STRICT_FMINIMUM,
+ ISD::STRICT_FMAXIMUM})
setOperationAction(Op, MVT::v1f64, Expand);
for (auto Op :
@@ -1140,8 +1188,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
}
- setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
- setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
@@ -1164,8 +1212,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
// Saturates
- for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
- MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
+ MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
@@ -1183,8 +1231,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// Vector reductions
- for (MVT VT : { MVT::v4f16, MVT::v2f32,
- MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
+ for (MVT VT :
+ {MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64}) {
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Legal);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Legal);
@@ -1194,8 +1242,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
}
}
- for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
- MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+ for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
+ MVT::v4i32}) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
@@ -1262,18 +1310,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::v2i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
// ADDP custom lowering
- for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
+ for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64})
setOperationAction(ISD::ADD, VT, Custom);
// FADDP custom lowering
- for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
+ for (MVT VT : {MVT::v16f16, MVT::v8f32, MVT::v4f64})
setOperationAction(ISD::FADD, VT, Custom);
}
@@ -1378,8 +1426,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Custom);
for (auto VT :
- { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
- MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
+ {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
+ MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16})
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
for (auto VT :
@@ -1565,11 +1613,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
MVT::v4i32, MVT::v1i64, MVT::v2i64})
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ true);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/true);
for (MVT VT :
{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ true);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/true);
}
// NOTE: Currently this has to happen after computeRegisterProperties rather
@@ -1577,10 +1625,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ false);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/false);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ false);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/false);
// 64bit results can mean a bigger than NEON input.
for (auto VT : {MVT::v8i8, MVT::v4i16})
@@ -1616,8 +1664,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
- for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
+ for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+ MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
@@ -1627,7 +1675,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MULHU, VT, Custom);
}
-
// Use SVE for vectors with more than 2 elements.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
@@ -1768,8 +1815,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
// F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP
// NEON types.
- if (VT.isFloatingPoint() &&
- VT.getVectorElementType() != MVT::bf16 &&
+ if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
@@ -2093,8 +2139,8 @@ static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
if (NewImm == 0 || NewImm == OrigMask) {
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
TLO.DAG.getConstant(NewImm, DL, VT));
- // Otherwise, create a machine node so that target independent DAG combine
- // doesn't undo this optimization.
+ // Otherwise, create a machine node so that target independent DAG combine
+ // doesn't undo this optimization.
} else {
Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
@@ -2224,7 +2270,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Intrinsic::ID IntID =
static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
switch (IntID) {
- default: return;
+ default:
+ return;
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
unsigned BitWidth = Known.getBitWidth();
@@ -2246,7 +2293,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
unsigned BitWidth = Known.getBitWidth();
if (VT == MVT::v8i8 || VT == MVT::v16i8) {
- unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
+ unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
assert(BitWidth >= Bound && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound);
Known.Zero |= Mask;
@@ -2284,26 +2331,26 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
unsigned VTBits = VT.getScalarSizeInBits();
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case AArch64ISD::CMEQ:
- case AArch64ISD::CMGE:
- case AArch64ISD::CMGT:
- case AArch64ISD::CMHI:
- case AArch64ISD::CMHS:
- case AArch64ISD::FCMEQ:
- case AArch64ISD::FCMGE:
- case AArch64ISD::FCMGT:
- case AArch64ISD::CMEQz:
- case AArch64ISD::CMGEz:
- case AArch64ISD::CMGTz:
- case AArch64ISD::CMLEz:
- case AArch64ISD::CMLTz:
- case AArch64ISD::FCMEQz:
- case AArch64ISD::FCMGEz:
- case AArch64ISD::FCMGTz:
- case AArch64ISD::FCMLEz:
- case AArch64ISD::FCMLTz:
- // Compares return either 0 or all-ones
- return VTBits;
+ case AArch64ISD::CMEQ:
+ case AArch64ISD::CMGE:
+ case AArch64ISD::CMGT:
+ case AArch64ISD::CMHI:
+ case AArch64ISD::CMHS:
+ case AArch64ISD::FCMEQ:
+ case AArch64ISD::FCMGE:
+ case AArch64ISD::FCMGT:
+ case AArch64ISD::CMEQz:
+ case AArch64ISD::CMGEz:
+ case AArch64ISD::CMGTz:
+ case AArch64ISD::CMLEz:
+ case AArch64ISD::CMLTz:
+ case AArch64ISD::FCMEQz:
+ case AArch64ISD::FCMGEz:
+ case AArch64ISD::FCMGTz:
+ case AArch64ISD::FCMLEz:
+ case AArch64ISD::FCMLTz:
+ // Compares return either 0 or all-ones
+ return VTBits;
}
return 1;
@@ -2758,8 +2805,9 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
return EndBB;
}
-MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
- MachineInstr &MI, MachineBasicBlock *BB) const {
+MachineBasicBlock *
+AArch64TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
assert(!isAsynchronousEHPersonality(classifyEHPersonality(
BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!");
@@ -2832,10 +2880,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
return BB;
}
-MachineBasicBlock *
-AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
- MachineInstr &MI,
- MachineBasicBlock *BB, bool HasTile) const {
+MachineBasicBlock *AArch64TargetLowering::EmitZAInstr(unsigned Opc,
+ unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB,
+ bool HasTile) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
unsigned StartIdx = 0;
@@ -3230,10 +3279,9 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
- const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
- DAG.getVTList(VT, MVT_CC),
- LHS.getOperand(0),
- LHS.getOperand(1));
+ const SDValue ANDSNode =
+ DAG.getNode(AArch64ISD::ANDS, dl, DAG.getVTList(VT, MVT_CC),
+ LHS.getOperand(0), LHS.getOperand(1));
// Replace all users of (and X, Y) with newly generated (ands X, Y)
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
return ANDSNode.getValue(1);
@@ -3369,11 +3417,11 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
SDValue O1 = Val->getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
- if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+ if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth + 1))
return false;
bool CanNegateR;
bool MustBeFirstR;
- if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+ if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth + 1))
return false;
if (MustBeFirstL && MustBeFirstR)
@@ -3410,8 +3458,8 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
/// \p Negate is true if we want this sub-tree being negated just by changing
/// SETCC conditions.
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
- AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
- AArch64CC::CondCode Predicate) {
+ AArch64CC::CondCode &OutCC, bool Negate,
+ SDValue CCOp, AArch64CC::CondCode Predicate) {
// We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
@@ -3605,8 +3653,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
case ISD::SETGT:
if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != INT64_MAX &&
- isLegalArithImmed(C + 1ULL))) {
+ (VT == MVT::i64 && C != INT64_MAX && isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
@@ -3676,9 +3723,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue SExt =
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
- Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
- RHS.getValueType()),
- CC, dl, DAG);
+ Cmp = emitComparison(
+ SExt, DAG.getConstant(ValueofRHS, dl, RHS.getValueType()), CC, dl,
+ DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
}
@@ -3770,10 +3817,9 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
- Overflow =
- DAG.getNode(AArch64ISD::SUBS, DL, VTs,
- DAG.getConstant(0, DL, MVT::i64),
- UpperBits).getValue(1);
+ Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+ DAG.getConstant(0, DL, MVT::i64), UpperBits)
+ .getValue(1);
}
break;
}
@@ -3956,8 +4002,8 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
- Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
- CCVal, Overflow);
+ Overflow =
+ DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
@@ -3986,10 +4032,10 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
}
// built the mask value encoding the expected behavior.
- unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
- (!IsData << 3) | // IsDataCache bit
- (Locality << 1) | // Cache level bits
- (unsigned)IsStream; // Stream bit
+ unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
+ (!IsData << 3) | // IsDataCache bit
+ (Locality << 1) | // Cache level bits
+ (unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
DAG.getTargetConstant(PrfOp, DL, MVT::i32),
Op.getOperand(1));
@@ -4055,8 +4101,7 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
- if (InVT.getVectorElementType() == MVT::f16 &&
- !Subtarget->hasFullFP16()) {
+ if (InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
if (IsStrict) {
@@ -4138,9 +4183,8 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
{Ext.getValue(1), Ext.getValue(0)});
}
- return DAG.getNode(
- Op.getOpcode(), dl, Op.getValueType(),
- DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
}
if (SrcVal.getValueType() != MVT::f128) {
@@ -4268,8 +4312,8 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
} else {
- SDValue MinC = DAG.getConstant(
- APInt::getAllOnes(SatWidth).zext(DstWidth), DL, DstVT);
+ SDValue MinC =
+ DAG.getConstant(APInt::getAllOnes(SatWidth).zext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
}
@@ -4314,8 +4358,7 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
if (IsStrict) {
- In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
- {Op.getOperand(0), In});
+ In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, {Op.getOperand(0), In});
return DAG.getNode(
ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
{In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
@@ -4337,9 +4380,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
// Use a scalar operation for conversions between single-element vectors of
// the same size.
if (VT.getVectorNumElements() == 1) {
- SDValue Extract = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
- In, DAG.getConstant(0, dl, MVT::i64));
+ SDValue Extract =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(), In,
+ DAG.getConstant(0, dl, MVT::i64));
EVT ScalarVT = VT.getScalarType();
if (IsStrict)
return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
@@ -4351,7 +4394,7 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
@@ -4368,10 +4411,9 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other},
{Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
}
- return DAG.getNode(
- ISD::FP_ROUND, dl, MVT::f16,
- DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
- DAG.getIntPtrConstant(0, dl));
+ return DAG.getNode(ISD::FP_ROUND, dl, MVT::f16,
+ DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
+ DAG.getIntPtrConstant(0, dl));
}
// i128 conversions are libcalls.
@@ -4403,8 +4445,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
Entry.IsZExt = false;
Args.push_back(Entry);
- RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
- : RTLIB::SINCOS_STRET_F32;
+ RTLIB::Libcall LC =
+ ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
@@ -4472,12 +4514,13 @@ static EVT getExtensionTo64Bits(const EVT &OrigVT) {
MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
switch (OrigSimpleTy) {
- default: llvm_unreachable("Unexpected Vector Type");
+ default:
+ llvm_unreachable("Unexpected Vector Type");
case MVT::v2i8:
case MVT::v2i16:
- return MVT::v2i32;
+ return MVT::v2i32;
case MVT::v4i8:
- return MVT::v4i16;
+ return MVT::v4i16;
}
}
@@ -4587,8 +4630,8 @@ static bool isAddSubSExt(SDValue N, SelectionDAG &DAG) {
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- return N0->hasOneUse() && N1->hasOneUse() &&
- isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) &&
+ isSignExtended(N1, DAG);
}
return false;
}
@@ -4598,8 +4641,8 @@ static bool isAddSubZExt(SDValue N, SelectionDAG &DAG) {
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- return N0->hasOneUse() && N1->hasOneUse() &&
- isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) &&
+ isZeroExtended(N1, DAG);
}
return false;
}
@@ -4694,8 +4737,7 @@ static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
else
ZextOperand = N1.getOperand(0);
if (DAG.SignBitIsZero(ZextOperand)) {
- SDValue NewSext =
- DAG.getSExtOrTrunc(ZextOperand, DL, N0.getValueType());
+ SDValue NewSext = DAG.getSExtOrTrunc(ZextOperand, DL, N0.getValueType());
if (IsN0ZExt)
N0 = NewSext;
else
@@ -4909,8 +4951,8 @@ SDValue AArch64TargetLowering::getRuntimePStateSM(SelectionDAG &DAG,
TargetLowering::CallLoweringInfo CLI(DAG);
ArgListTy Args;
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
- CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2,
- RetTy, Callee, std::move(Args));
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2, RetTy,
+ Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Mask = DAG.getConstant(/*PSTATE.SM*/ 1, DL, MVT::i64);
return DAG.getNode(ISD::AND, DL, MVT::i64, CallResult.first.getOperand(0),
@@ -5076,12 +5118,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
}
-SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(0);
SDLoc dl(Op);
switch (IntNo) {
- default: return SDValue(); // Don't custom lower most intrinsics.
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
@@ -5089,8 +5133,8 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_neon_abs: {
EVT Ty = Op.getValueType();
if (Ty == MVT::i64) {
- SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
- Op.getOperand(1));
+ SDValue Result =
+ DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Op.getOperand(1));
Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
@@ -5158,17 +5202,17 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::PMULL, dl, Op.getValueType(), LHS, RHS);
}
case Intrinsic::aarch64_neon_smax:
- return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
case Intrinsic::aarch64_neon_umax:
- return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
case Intrinsic::aarch64_neon_smin:
- return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
case Intrinsic::aarch64_neon_umin:
- return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
case Intrinsic::aarch64_neon_scalar_sqxtn:
case Intrinsic::aarch64_neon_scalar_sqxtun:
case Intrinsic::aarch64_neon_scalar_uqxtn: {
@@ -5233,8 +5277,8 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_tbl:
- return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
case Intrinsic::aarch64_sve_trn1:
return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
@@ -5309,8 +5353,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frinti:
- return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
case Intrinsic::aarch64_sve_frintx:
return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -5318,8 +5363,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintn:
- return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
case Intrinsic::aarch64_sve_frintz:
return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -5332,13 +5378,11 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzu:
- return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
- Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(1));
+ return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzs:
- return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
- Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(1));
+ return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_fsqrt:
return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -5836,9 +5880,8 @@ SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
-static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
- EVT VT, EVT MemVT,
- SelectionDAG &DAG) {
+static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT,
+ EVT MemVT, SelectionDAG &DAG) {
assert(VT.isVector() && "VT should be a vector type");
assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
@@ -5852,29 +5895,28 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
// str s0, [x0]
SDValue Undef = DAG.getUNDEF(MVT::i16);
- SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
- {Undef, Undef, Undef, Undef});
+ SDValue UndefVec =
+ DAG.getBuildVector(MVT::v4i16, DL, {Undef, Undef, Undef, Undef});
- SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
- Value, UndefVec);
+ SDValue TruncExt =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Value, UndefVec);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Trunc, DAG.getConstant(0, DL, MVT::i64));
- return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
- ST->getBasePtr(), ST->getMemOperand());
+ return DAG.getStore(ST->getChain(), DL, ExtractTrunc, ST->getBasePtr(),
+ ST->getMemOperand());
}
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
-SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc Dl(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
- assert (StoreNode && "Can only custom lower store nodes");
+ assert(StoreNode && "Can only custom lower store nodes");
SDValue Value = StoreNode->getValue();
@@ -5934,8 +5976,8 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SDValue Base = StoreNode->getBasePtr();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
- SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
- Value, DAG.getConstant(i, Dl, MVT::i32));
+ SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, Value,
+ DAG.getConstant(i, Dl, MVT::i32));
SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
DAG.getConstant(i * 8, Dl, PtrVT));
Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
@@ -5979,8 +6021,7 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
return Result;
}
-SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
@@ -5993,9 +6034,9 @@ SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
for (unsigned i = 0; i < 8; i++) {
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
DAG.getConstant(i * 8, DL, PtrVT));
- SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
- LoadNode->getPointerInfo(),
- LoadNode->getOriginalAlign());
+ SDValue Part =
+ DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+ LoadNode->getOriginalAlign());
Ops.push_back(Part);
Chain = SDValue(Part.getNode(), 1);
}
@@ -6043,9 +6084,8 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(0));
// Generate SUBS & CSEL.
- SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
@@ -6611,7 +6651,6 @@ AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
}
}
-
unsigned
AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
SelectionDAG &DAG) const {
@@ -6667,7 +6706,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs,
DAG.getTargetLoweringInfo(), MF.getDataLayout());
- if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); }))
+ if (any_of(Outs,
+ [](ISD::OutputArg &Out) { return Out.VT.isScalableVector(); }))
FuncInfo->setIsSVECC(true);
// Assign locations to all of the incoming arguments.
@@ -6728,10 +6768,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
- // FIXME: This works on big-endian for composite byvals, which are the common
- // case. It should also work for fundamental types too.
+ // FIXME: This works on big-endian for composite byvals, which are the
+ // common case. It should also work for fundamental types too.
unsigned FrameIdx =
- MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+ MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
@@ -6822,7 +6862,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
? VA.getLocVT().getSizeInBits()
- : VA.getValVT().getSizeInBits()) / 8;
+ : VA.getValVT().getSizeInBits()) /
+ 8;
uint32_t BEAlign = 0;
if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
@@ -6885,8 +6926,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
- assert((VA.getValVT().isScalableVT() ||
- Subtarget->isWindowsArm64EC()) &&
+ assert((VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) &&
"Indirect arguments should be scalable on most subtargets");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinValue();
@@ -6965,12 +7005,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Ensure that the SMSTART happens after the CopyWithChain such that its
// chain result is used.
- for (unsigned I=0; I<InVals.size(); ++I) {
+ for (unsigned I = 0; I < InVals.size(); ++I) {
Register Reg = MF.getRegInfo().createVirtualRegister(
getRegClassFor(InVals[I].getValueType().getSimpleVT()));
Chain = DAG.getCopyToReg(Chain, DL, Reg, InVals[I]);
- InVals[I] = DAG.getCopyFromReg(Chain, DL, Reg,
- InVals[I].getValueType());
+ InVals[I] = DAG.getCopyFromReg(Chain, DL, Reg, InVals[I].getValueType());
}
}
@@ -6980,8 +7019,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// The AAPCS variadic function ABI is identical to the non-variadic
// one. As a result there may be more arguments in registers and we should
// save them for future reference.
- // Win64 variadic functions also pass arguments in registers, but all float
- // arguments are passed in integer registers.
+ // Win64 variadic functions also pass arguments in registers, but all
+ // float arguments are passed in integer registers.
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
}
@@ -6999,7 +7038,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
RegParmTypes.push_back(MVT::f128);
// Compute the set of forwarded registers. The rest are scratch.
SmallVectorImpl<ForwardedRegister> &Forwards =
- FuncInfo->getForwardedMustTailRegParms();
+ FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
CC_AArch64_AAPCS);
@@ -7073,7 +7112,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
+ bool IsWin64 =
+ Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
SmallVector<SDValue, 8> MemOps;
@@ -7093,7 +7133,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
if (GPRSaveSize & 15)
// The extra size here, if triggered, will always be 8.
- MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
+ MFI.CreateFixedObject(16 - (GPRSaveSize & 15),
+ -(int)alignTo(GPRSaveSize, 16), false);
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
@@ -7281,9 +7322,9 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
if (!UseVarArgCC) {
// Get type of the original argument.
- EVT ActualVT =
- TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
- /*AllowUnknown*/ true);
+ EVT ActualVT = TLI.getValueType(DAG.getDataLayout(),
+ CLI.Args[Outs[i].OrigArgIndex].Ty,
+ /*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
@@ -7360,7 +7401,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return false;
}
- if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
+ if (canGuaranteeTCO(CalleeCC,
+ getTargetMachine().Options.GuaranteedTailCallOpt))
return CCMatch;
// Externally-defined functions with weak linkage should not be
@@ -7416,10 +7458,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
- // When we are musttail, additional checks have been done and we can safely ignore this check
- // At least two cases here: if caller is fastcc then we can't have any
- // memory arguments (we'd be expected to clean up the stack afterwards). If
- // caller is C then we could potentially use its argument area.
+ // When we are musttail, additional checks have been done and we can safely
+ // ignore this check At least two cases here: if caller is fastcc then we
+ // can't have any memory arguments (we'd be expected to clean up the stack
+ // afterwards). If caller is C then we could potentially use its argument
+ // area.
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
@@ -7520,9 +7563,11 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
MI.removeOperand(I);
}
-SDValue AArch64TargetLowering::changeStreamingMode(
- SelectionDAG &DAG, SDLoc DL, bool Enable,
- SDValue Chain, SDValue InGlue, SDValue PStateSM, bool Entry) const {
+SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
+ bool Enable, SDValue Chain,
+ SDValue InGlue,
+ SDValue PStateSM,
+ bool Entry) const {
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
FuncInfo->setHasStreamingModeChanges(true);
@@ -7693,7 +7738,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (RequiresLazySave) {
unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
- SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj,
+ SDValue TPIDR2ObjAddr = DAG.getFrameIndex(
+ TPIDR2Obj,
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
SDValue NumZaSaveSlicesAddr =
DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr,
@@ -7708,10 +7754,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
TPIDR2ObjAddr);
OptimizationRemarkEmitter ORE(&MF.getFunction());
ORE.emit([&]() {
- auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
- CLI.CB)
- : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
- &MF.getFunction());
+ auto R = CLI.CB
+ ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA", CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
+ &MF.getFunction());
return DescribeCallsite(R) << " sets up a lazy save for ZA";
});
}
@@ -7727,10 +7773,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64);
OptimizationRemarkEmitter ORE(&MF.getFunction());
ORE.emit([&]() {
- auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
- CLI.CB)
- : OptimizationRemarkAnalysis("sme", "SMETransition",
- &MF.getFunction());
+ auto R = CLI.CB
+ ? OptimizationRemarkAnalysis("sme", "SMETransition", CLI.CB)
+ : OptimizationRemarkAnalysis("sme", "SMETransition",
+ &MF.getFunction());
DescribeCallsite(R) << " requires a streaming mode transition";
return R;
});
@@ -7781,7 +7827,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
- RegsToPass.emplace_back(F.PReg, Val);
+ RegsToPass.emplace_back(F.PReg, Val);
}
}
@@ -8043,8 +8089,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
for (auto &RegToPass : RegsToPass) {
- Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
- RegToPass.second, InGlue);
+ Chain =
+ DAG.getCopyToReg(Chain, DL, RegToPass.first, RegToPass.second, InGlue);
InGlue = Chain.getValue(1);
}
@@ -8097,8 +8143,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add argument registers to the end of the list so that they are known live
// into the call.
for (auto &RegToPass : RegsToPass)
- Ops.push_back(DAG.getRegister(RegToPass.first,
- RegToPass.second.getValueType()));
+ Ops.push_back(
+ DAG.getRegister(RegToPass.first, RegToPass.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
@@ -8243,8 +8289,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Register Reg = MF.getRegInfo().createVirtualRegister(
getRegClassFor(InVals[I].getValueType().getSimpleVT()));
SDValue X = DAG.getCopyToReg(Result, DL, Reg, InVals[I]);
- InVals[I] = DAG.getCopyFromReg(X, DL, Reg,
- InVals[I].getValueType());
+ InVals[I] = DAG.getCopyFromReg(X, DL, Reg, InVals[I].getValueType());
}
}
@@ -8365,7 +8410,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Glue = Chain.getValue(1);
RetOps.push_back(
- DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
@@ -8428,7 +8473,7 @@ SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
N->getOffset(), Flag);
}
-SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
+SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
@@ -8643,8 +8688,7 @@ SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
- return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
- LoVar,
+ return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
@@ -8851,8 +8895,8 @@ AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
// The pointer to the thread's TLS data area is at the TLS Index scaled by 8
// offset into the TLSArray.
TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
- SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
- DAG.getConstant(3, DL, PtrVT));
+ SDValue Slot =
+ DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, DAG.getConstant(3, DL, PtrVT));
SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
MachinePointerInfo());
@@ -9562,8 +9606,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
// (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
// Both require less instructions than compare and conditional select.
- if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TVal &&
- RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
+ if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TVal && RHSC &&
+ RHSC->isZero() && CFVal && CFVal->isZero() &&
LHS.getValueType() == RHS.getValueType()) {
EVT VT = LHS.getValueType();
SDValue Shift =
@@ -9677,7 +9721,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
FVal = LHS;
} else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
- assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
+ assert(CTVal && CFVal && "Expected constant operands for CSNEG.");
// Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
// avoid materializing C.
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
@@ -9801,8 +9845,7 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
if (Ty == MVT::aarch64svcount) {
TVal = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i1, TVal);
FVal = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i1, FVal);
- SDValue Sel =
- DAG.getNode(ISD::SELECT, DL, MVT::nxv16i1, CCVal, TVal, FVal);
+ SDValue Sel = DAG.getNode(ISD::SELECT, DL, MVT::nxv16i1, CCVal, TVal, FVal);
return DAG.getNode(ISD::BITCAST, DL, Ty, Sel);
}
@@ -9885,8 +9928,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
return getAddr(JT, DAG);
}
-SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
SDLoc DL(Op);
@@ -9922,7 +9964,7 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
CodeModel::Model CM = getTargetMachine().getCodeModel();
if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) {
@@ -9935,7 +9977,7 @@ SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
}
SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
@@ -10077,9 +10119,9 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
SDLoc DL(Op);
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
unsigned VaListSize =
- (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
- ? PtrSize
- : Subtarget->isTargetILP32() ? 20 : 32;
+ (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) ? PtrSize
+ : Subtarget->isTargetILP32() ? 20
+ : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
@@ -10151,7 +10193,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue NarrowFP =
DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
DAG.getIntPtrConstant(1, DL, /*isTarget=*/true));
- SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
+ SDValue Ops[] = {NarrowFP, WideFP.getValue(1)};
// Merge the rounded value with the chain output of the load.
return DAG.getMergeValues(Ops, DL);
}
@@ -10195,8 +10237,9 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register AArch64TargetLowering::
-getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
+Register
+AArch64TargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo();
@@ -10207,8 +10250,8 @@ getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const
}
if (Reg)
return Reg;
- report_fatal_error(Twine("Invalid register name \""
- + StringRef(RegName) + "\"."));
+ report_fatal_error(
+ Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
}
SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
@@ -10314,7 +10357,8 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
}
LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT
- << " imm value: "; Imm.dump(););
+ << " imm value: ";
+ Imm.dump(););
return IsLegal;
}
@@ -10380,8 +10424,8 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
- SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
- Flags);
+ SDValue Step =
+ DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, Flags);
Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
@@ -10410,8 +10454,8 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
// Newton reciprocal iteration: E * (2 - X * E)
// AArch64 reciprocal iteration instruction: (2 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
- SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
- Estimate, Flags);
+ SDValue Step =
+ DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, Estimate, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
@@ -10463,9 +10507,8 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
if (ConstraintVT.isFloatingPoint())
return "w";
- if (ConstraintVT.isVector() &&
- (ConstraintVT.getSizeInBits() == 64 ||
- ConstraintVT.getSizeInBits() == 128))
+ if (ConstraintVT.isVector() && (ConstraintVT.getSizeInBits() == 64 ||
+ ConstraintVT.getSizeInBits() == 128))
return "w";
return "r";
@@ -11069,10 +11112,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int WindowScale;
ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
+ : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
- bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ bool operator==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -11158,8 +11201,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32));
assert((Mask.size() == 8 || Mask.size() == 16) &&
"Expected a v8i8 or v16i8 Mask");
- TBLOperands.push_back(
- DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
+ TBLOperands.push_back(DAG.getBuildVector(
+ Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
SDValue Shuffle =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
@@ -11250,14 +11293,15 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (!SrcVT.is64BitVector()) {
LLVM_DEBUG(
- dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
- "for SVE vectors.");
+ dbgs()
+ << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
+ "for SVE vectors.");
return SDValue();
}
- Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
- VEXTSrc2,
- DAG.getConstant(Imm, dl, MVT::i32));
+ Src.ShuffleVec =
+ DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, VEXTSrc2,
+ DAG.getConstant(Imm, dl, MVT::i32));
Src.WindowBase = -Src.MinElt;
}
}
@@ -11301,8 +11345,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
- int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
- VT.getScalarSizeInBits());
+ int BitsDefined =
+ std::min(OrigEltTy.getScalarSizeInBits(), VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -11321,12 +11365,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
return SDValue();
}
- SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ SDValue ShuffleOps[] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
- SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
- ShuffleOps[1], Mask);
+ SDValue Shuffle =
+ DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask);
SDValue V;
if (DAG.getDataLayout().isBigEndian()) {
V = DAG.getNode(AArch64ISD::NVCAST, dl, VT, Shuffle);
@@ -11374,7 +11418,8 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
// v4i32s. This is really a truncate, which we can construct out of (legal)
// concats and truncate nodes.
-static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) {
+static SDValue ReconstructTruncateFromBuildVector(SDValue V,
+ SelectionDAG &DAG) {
if (V.getValueType() != MVT::v16i8)
return SDValue();
assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");
@@ -11661,8 +11706,8 @@ static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return true;
}
-static bool isINSMask(ArrayRef<int> M, int NumInputElements,
- bool &DstIsLeft, int &Anomaly) {
+static bool isINSMask(ArrayRef<int> M, int NumInputElements, bool &DstIsLeft,
+ int &Anomaly) {
if (M.size() != static_cast<size_t>(NumInputElements))
return false;
@@ -11750,11 +11795,11 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle. ID is the perfect-shuffle
-//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
-//table entry and LHS/RHS are the immediate inputs for this stage of the
-//shuffle.
-static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
- SDValue V2, unsigned PFEntry, SDValue LHS,
+// ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
+// table entry and LHS/RHS are the immediate inputs for this stage of the
+// shuffle.
+static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
+ unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
@@ -12256,9 +12301,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
for (unsigned LaneSize : {64U, 32U, 16U}) {
unsigned Lane = 0;
if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
- unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
- : LaneSize == 32 ? AArch64ISD::DUPLANE32
- : AArch64ISD::DUPLANE16;
+ unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
+ : LaneSize == 32 ? AArch64ISD::DUPLANE32
+ : AArch64ISD::DUPLANE16;
// Cast V1 to an integer vector with required lane size
MVT NewEltTy = MVT::getIntegerVT(LaneSize);
unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
@@ -12458,7 +12503,6 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
}
-
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
@@ -12483,7 +12527,7 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
// Try 64-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
- const APInt &Bits) {
+ const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
@@ -12493,8 +12537,8 @@ static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
SDLoc dl(Op);
- SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
- DAG.getConstant(Value, dl, MVT::i32));
+ SDValue Mov =
+ DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -12520,16 +12564,13 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
Shift = 0;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
Shift = 8;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
Shift = 16;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
Shift = 24;
}
@@ -12544,9 +12585,9 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
- Mov = DAG.getNode(NewOp, dl, MovTy,
- DAG.getConstant(Value, dl, MVT::i32),
- DAG.getConstant(Shift, dl, MVT::i32));
+ Mov =
+ DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+ DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -12573,8 +12614,7 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
Shift = 0;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
Shift = 8;
}
@@ -12589,9 +12629,9 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
- Mov = DAG.getNode(NewOp, dl, MovTy,
- DAG.getConstant(Value, dl, MVT::i32),
- DAG.getConstant(Shift, dl, MVT::i32));
+ Mov =
+ DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+ DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
@@ -12613,17 +12653,16 @@ static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
Shift = 264;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
+ } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
Shift = 272;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
- SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
- DAG.getConstant(Value, dl, MVT::i32),
- DAG.getConstant(Shift, dl, MVT::i32));
+ SDValue Mov =
+ DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+ DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -12643,8 +12682,8 @@ static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
SDLoc dl(Op);
- SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
- DAG.getConstant(Value, dl, MVT::i32));
+ SDValue Mov =
+ DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -12665,17 +12704,16 @@ static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
- }
- else if (isWide &&
- (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
+ } else if (isWide &&
+ (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
MovTy = MVT::v2f64;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
- SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
- DAG.getConstant(Value, dl, MVT::i32));
+ SDValue Mov =
+ DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -12887,16 +12925,15 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
- DefBits, &LHS)) ||
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
- DefBits, &LHS)))
+ if ((NewOp =
+ tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS)) ||
+ (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS)))
return NewOp;
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
- UndefBits, &LHS)) ||
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
- UndefBits, &LHS)))
+ if ((NewOp =
+ tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS)) ||
+ (NewOp =
+ tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, UndefBits, &LHS)))
return NewOp;
}
@@ -12906,12 +12943,11 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
// Normalize the operands of BUILD_VECTOR. The value of constant operands will
// be truncated to fit element width.
-static SDValue NormalizeBuildVector(SDValue Op,
- SelectionDAG &DAG) {
+static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
- EVT EltTy= VT.getVectorElementType();
+ EVT EltTy = VT.getVectorElementType();
if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
return Op;
@@ -12923,8 +12959,7 @@ static SDValue NormalizeBuildVector(SDValue Op,
// (with operands cast to integers), then the only possibilities
// are constants and UNDEFs.
if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
- APInt LowBits(EltTy.getSizeInBits(),
- CstLane->getZExtValue());
+ APInt LowBits(EltTy.getSizeInBits(), CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
} else if (Lane.getNode()->isUndef()) {
Lane = DAG.getUNDEF(MVT::i32);
@@ -13206,8 +13241,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
EVT EltTy = VT.getVectorElementType();
- assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
- EltTy == MVT::f64) && "Unsupported floating-point vector type");
+ assert((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
+ EltTy == MVT::f64) &&
+ "Unsupported floating-point vector type");
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n");
@@ -13812,8 +13848,9 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ElementBits) ||
+ if (!BVN ||
+ !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElementBits) ||
SplatBitSize > ElementBits)
return false;
Cnt = SplatBits.getSExtValue();
@@ -13931,10 +13968,10 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
- MVT::i32),
- Op.getOperand(0), Op.getOperand(1));
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32),
+ Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() && Subtarget->hasSVE2orSME()) {
@@ -13969,10 +14006,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
// negate the shift amount
SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(1));
- SDValue NegShiftLeft =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
- NegShift);
+ SDValue NegShiftLeft = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Opc, DL, MVT::i32),
+ Op.getOperand(0), NegShift);
return NegShiftLeft;
}
@@ -14130,7 +14166,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
}
assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
- LHS.getValueType().getVectorElementType() != MVT::f128);
+ LHS.getValueType().getVectorElementType() != MVT::f128);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
@@ -14138,15 +14174,14 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
bool ShouldInvert;
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
- bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
- SDValue Cmp =
- EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
+ bool NoNaNs =
+ getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
+ SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
return SDValue();
if (CC2 != AArch64CC::AL) {
- SDValue Cmp2 =
- EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
+ SDValue Cmp2 = EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
if (!Cmp2.getNode())
return SDValue();
@@ -14392,8 +14427,8 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
- PtrVT, 0);
+ SDValue Callee =
+ DAG.getTargetExternalSymbol(Subtarget->getChkStkName(), PtrVT, 0);
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
@@ -14761,7 +14796,7 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
// The shift can be combined if it matches the size of the value being
// loaded (and so reducing the width would make it not match).
uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
- uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
+ uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits() / 8;
if (ShiftAmount == Log2_32(LoadBytes))
return false;
}
@@ -14822,10 +14857,10 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
const DataLayout &DL = F->getParent()->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
- return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
- isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
- (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath));
+ return !(
+ isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
@@ -14884,7 +14919,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
auto &DL = Ext->getModule()->getDataLayout();
- std::advance(GTI, U.getOperandNo()-1);
+ std::advance(GTI, U.getOperandNo() - 1);
Type *IdxTy = GTI.getIndexedType();
// This extension will end up with a shift because of the scaling factor.
// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
@@ -15231,7 +15266,8 @@ bool AArch64TargetLowering::shouldSinkOperands(
I->getParent() != IB->getParent())
return false;
- Ops.push_back(&MainAnd->getOperandUse(MainAnd->getOperand(0) == IA ? 1 : 0));
+ Ops.push_back(
+ &MainAnd->getOperandUse(MainAnd->getOperand(0) == IA ? 1 : 0));
Ops.push_back(&I->getOperandUse(0));
Ops.push_back(&I->getOperandUse(1));
@@ -16257,8 +16293,8 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
}
// Same encoding for add/sub, just flip the sign.
Immed = std::abs(Immed);
- bool IsLegal = ((Immed >> 12) == 0 ||
- ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
+ bool IsLegal =
+ ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
LLVM_DEBUG(dbgs() << "Is " << Immed
<< " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
return IsLegal;
@@ -16302,8 +16338,9 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
- const AddrMode &AMode, Type *Ty,
- unsigned AS, Instruction *I) const {
+ const AddrMode &AMode,
+ Type *Ty, unsigned AS,
+ Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
@@ -16424,9 +16461,8 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
// site. Hence we include LR in the scratch registers, which are in turn added
// as implicit-defs for stackmaps and patchpoints.
- static const MCPhysReg ScratchRegs[] = {
- AArch64::X16, AArch64::X17, AArch64::LR, 0
- };
+ static const MCPhysReg ScratchRegs[] = {AArch64::X16, AArch64::X17,
+ AArch64::LR, 0};
return ScratchRegs;
}
@@ -16435,9 +16471,8 @@ ArrayRef<MCPhysReg> AArch64TargetLowering::getRoundingControlRegisters() const {
return RCRegs;
}
-bool
-AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
- CombineLevel Level) const {
+bool AArch64TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
@@ -16886,7 +16921,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
- return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue(N, 0); // Lower SDIV as SDIV
EVT VT = N->getValueType(0);
@@ -16958,7 +16993,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
}
static std::optional<unsigned> IsSVECntIntrinsic(SDValue S) {
- switch(getIntrinsicID(S.getNode())) {
+ switch (getIntrinsicID(S.getNode())) {
default:
break;
case Intrinsic::aarch64_sve_cntb:
@@ -17193,11 +17228,10 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// Allow the scaling to be folded into the `cnt` instruction by preventing
// the scaling to be obscured here. This makes it easier to pattern match.
- if (IsSVECntIntrinsic(N0) ||
- (N0->getOpcode() == ISD::TRUNCATE &&
- (IsSVECntIntrinsic(N0->getOperand(0)))))
- if (ConstValue.sge(1) && ConstValue.sle(16))
- return SDValue();
+ if (IsSVECntIntrinsic(N0) || (N0->getOpcode() == ISD::TRUNCATE &&
+ (IsSVECntIntrinsic(N0->getOperand(0)))))
+ if (ConstValue.sge(1) && ConstValue.sle(16))
+ return SDValue();
// Multiplication of a power of two plus/minus one can be done more
// cheaply as shift+add/sub. For now, this is true unilaterally. If
@@ -17215,8 +17249,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
- if (N0->hasOneUse() && (isSignExtended(N0, DAG) ||
- isZeroExtended(N0, DAG)))
+ if (N0->hasOneUse() && (isSignExtended(N0, DAG) || isZeroExtended(N0, DAG)))
return SDValue();
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into madd or msub.
@@ -17393,8 +17426,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
// to use the new Chain.
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
- unsigned Opcode =
- (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
+ unsigned Opcode = (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF
+ : AArch64ISD::UITOF;
return DAG.getNode(Opcode, SDLoc(N), VT, Load);
}
@@ -17507,7 +17540,8 @@ static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
- int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
+ int32_t C =
+ BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
if (C == -1 || C == 0 || C > FloatBits)
return SDValue();
@@ -17819,8 +17853,8 @@ static SDValue performSVEAndCombine(SDNode *N,
Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
- SDValue And = DAG.getNode(ISD::AND, DL,
- UnpkOp->getValueType(0), UnpkOp, Dup);
+ SDValue And =
+ DAG.getNode(ISD::AND, DL, UnpkOp->getValueType(0), UnpkOp, Dup);
return DAG.getNode(Opc, DL, N->getValueType(0), And);
}
@@ -17930,7 +17964,7 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
- if (SDValue R = performANDSETCCCombine(N,DCI))
+ if (SDValue R = performANDSETCCCombine(N, DCI))
return R;
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -18176,8 +18210,7 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// with the strict_fadd, but we also need uses of the chain output of the
// original strict_fadd to use the chain output of the new strict_fadd as
// otherwise it may not be deleted.
- SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
- {VT, MVT::Other},
+ SDValue Ret = DAG.getNode(N0->getOpcode(), DL, {VT, MVT::Other},
{N0->getOperand(0), Extract1, Extract2});
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1));
@@ -18254,9 +18287,8 @@ static SDValue performConcatVectorsCombine(SDNode *N,
Ops.push_back(DAG.getUNDEF(MVT::f32));
else {
LoadSDNode *LD = cast<LoadSDNode>(V);
- SDValue NewLoad =
- DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
- LD->getMemOperand());
+ SDValue NewLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(),
+ LD->getBasePtr(), LD->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
Ops.push_back(NewLoad);
}
@@ -18670,11 +18702,11 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
}
// Returns true if Op is setcc or zext of setcc.
-static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
+static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info) {
if (isSetCC(Op, Info))
return true;
return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
- isSetCC(Op->getOperand(0), Info));
+ isSetCC(Op->getOperand(0), Info));
}
// The folding we want to perform is:
@@ -19097,7 +19129,8 @@ static SDValue performBuildVectorCombine(SDNode *N,
if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
return SDValue();
- SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
+ SDValue SubvectorIdx =
+ DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
@@ -19107,8 +19140,7 @@ static SDValue performBuildVectorCombine(SDNode *N,
return SDValue();
}
-static SDValue performTruncateCombine(SDNode *N,
- SelectionDAG &DAG) {
+static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
if (VT.isFixedLengthVector() && VT.is64BitVector() && N0.hasOneUse() &&
@@ -19359,8 +19391,10 @@ static bool isLoadOrMultipleLoads(SDValue B, SmallVector<LoadSDNode *> &Loads) {
// are lowered. Note that this only comes up because we do not always visit
// operands before uses. After that is fixed this can be removed and in the
// meantime this is fairly specific to the lowering we expect from IR.
- // t46: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,8,9,10,11,16,17,18,19> t44, t45
- // t44: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,16,17,18,19,u,u,u,u> t42, t43
+ // t46: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,8,9,10,11,16,17,18,19> t44,
+ // t45
+ // t44: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,16,17,18,19,u,u,u,u> t42,
+ // t43
// t42: v16i8 = concat_vectors t40, t36, undef:v4i8, undef:v4i8
// t40: v4i8,ch = load<(load (s32) from %ir.17)> t0, t22, undef:i64
// t36: v4i8,ch = load<(load (s32) from %ir.13)> t0, t18, undef:i64
@@ -19759,8 +19793,7 @@ static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
SelectionDAG &DAG) {
SDLoc dl(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
- DAG.getNode(Opc, dl,
- N->getOperand(1).getSimpleValueType(),
+ DAG.getNode(Opc, dl, N->getOperand(1).getSimpleValueType(),
N->getOperand(1)),
DAG.getConstant(0, dl, MVT::i64));
}
@@ -19862,7 +19895,7 @@ static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
case Intrinsic::aarch64_sve_cmphs_wide:
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
- case Intrinsic::aarch64_sve_cmpls_wide: {
+ case Intrinsic::aarch64_sve_cmpls_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
uint64_t ImmVal = CN->getZExtValue();
if (ImmVal <= 127)
@@ -20428,9 +20461,8 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
uint64_t BaseOffset = 0;
const MachinePointerInfo &PtrInfo = St.getPointerInfo();
- SDValue NewST1 =
- DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
- OrigAlignment, St.getMemOperand()->getFlags());
+ SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
+ OrigAlignment, St.getMemOperand()->getFlags());
// As this in ISel, we will not merge this add which may degrade results.
if (BasePtr->getOpcode() == ISD::ADD &&
@@ -20495,10 +20527,10 @@ static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
ContainerVT = getSVEContainerType(ContainerVT);
SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
- SDValue Ops[] = { N->getOperand(0), // Chain
- N->getOperand(2), // Pg
- N->getOperand(3), // Base
- DAG.getValueType(VT) };
+ SDValue Ops[] = {N->getOperand(0), // Chain
+ N->getOperand(2), // Pg
+ N->getOperand(3), // Base
+ DAG.getValueType(VT)};
SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
@@ -20506,7 +20538,7 @@ static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
if (ContainerVT.isInteger() && (VT != ContainerVT))
Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
- return DAG.getMergeValues({ Load, LoadChain }, DL);
+ return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
@@ -20520,16 +20552,16 @@ static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
auto *MINode = cast<MemIntrinsicSDNode>(N);
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
- SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
- MINode->getOperand(3), DAG.getUNDEF(PtrTy),
- MINode->getOperand(2), PassThru,
- MINode->getMemoryVT(), MINode->getMemOperand(),
- ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
+ SDValue L =
+ DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), MINode->getOperand(3),
+ DAG.getUNDEF(PtrTy), MINode->getOperand(2), PassThru,
+ MINode->getMemoryVT(), MINode->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
- if (VT.isFloatingPoint()) {
- SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
- return DAG.getMergeValues(Ops, DL);
- }
+ if (VT.isFloatingPoint()) {
+ SDValue Ops[] = {DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1)};
+ return DAG.getMergeValues(Ops, DL);
+ }
return L;
}
@@ -20572,12 +20604,11 @@ static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
- SDValue Ops[] = { N->getOperand(0), // Chain
- SrcNew,
- N->getOperand(4), // Base
- N->getOperand(3), // Pg
- InputVT
- };
+ SDValue Ops[] = {N->getOperand(0), // Chain
+ SrcNew,
+ N->getOperand(4), // Base
+ N->getOperand(3), // Pg
+ InputVT};
return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
}
@@ -20729,7 +20760,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
}
// Check that all vector element locations were inserted to.
if (IndexNotInserted.any())
- return SDValue();
+ return SDValue();
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
@@ -21250,11 +21281,12 @@ static SDValue performPostLD1Combine(SDNode *N,
SDValue Addr = LD->getOperand(1);
SDValue Vector = N->getOperand(0);
// Search for a use of the address operand that is an increment.
- for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
- Addr.getNode()->use_end(); UI != UE; ++UI) {
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end();
+ UI != UE; ++UI) {
SDNode *User = *UI;
- if (User->getOpcode() != ISD::ADD
- || UI.getUse().getResNo() != Addr.getResNo())
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
continue;
// If the increment is a constant, it must match the memory ref size.
@@ -21280,19 +21312,19 @@ static SDValue performPostLD1Combine(SDNode *N,
continue;
SmallVector<SDValue, 8> Ops;
- Ops.push_back(LD->getOperand(0)); // Chain
+ Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
- Ops.push_back(Vector); // The vector to be inserted
- Ops.push_back(Lane); // The lane to be inserted in the vector
+ Ops.push_back(Vector); // The vector to be inserted
+ Ops.push_back(Lane); // The lane to be inserted in the vector
}
Ops.push_back(Addr);
Ops.push_back(Inc);
- EVT Tys[3] = { VT, MVT::i64, MVT::Other };
+ EVT Tys[3] = {VT, MVT::i64, MVT::Other};
SDVTList SDTys = DAG.getVTList(Tys);
- unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
- MemVT,
+ unsigned NewOp =
+ IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, MemVT,
LoadSDN->getMemOperand());
// Update the uses.
@@ -21301,8 +21333,8 @@ static SDValue performPostLD1Combine(SDNode *N,
SDValue(UpdN.getNode(), 2) // Chain
};
DCI.CombineTo(LD, NewResults);
- DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
- DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
+ DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
break;
}
@@ -21964,7 +21996,8 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
- UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ UE = Addr.getNode()->use_end();
+ UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD ||
UI.getUse().getResNo() != Addr.getResNo())
@@ -21989,49 +22022,110 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
unsigned NumVecs = 0;
unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
- default: llvm_unreachable("unexpected intrinsic for Neon base update");
- case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
- NumVecs = 2; break;
- case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
- NumVecs = 3; break;
- case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
- NumVecs = 4; break;
- case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
- NumVecs = 2; IsStore = true; break;
- case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
- NumVecs = 3; IsStore = true; break;
- case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
- NumVecs = 4; IsStore = true; break;
- case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
- NumVecs = 2; break;
- case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
- NumVecs = 3; break;
- case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
- NumVecs = 4; break;
- case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
- NumVecs = 2; IsStore = true; break;
- case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
- NumVecs = 3; IsStore = true; break;
- case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
- NumVecs = 4; IsStore = true; break;
- case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
- NumVecs = 2; IsDupOp = true; break;
- case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
- NumVecs = 3; IsDupOp = true; break;
- case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
- NumVecs = 4; IsDupOp = true; break;
- case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
- NumVecs = 2; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
- NumVecs = 3; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
- NumVecs = 4; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
- NumVecs = 2; IsStore = true; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
- NumVecs = 3; IsStore = true; IsLaneOp = true; break;
- case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
- NumVecs = 4; IsStore = true; IsLaneOp = true; break;
+ default:
+ llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::aarch64_neon_ld2:
+ NewOpc = AArch64ISD::LD2post;
+ NumVecs = 2;
+ break;
+ case Intrinsic::aarch64_neon_ld3:
+ NewOpc = AArch64ISD::LD3post;
+ NumVecs = 3;
+ break;
+ case Intrinsic::aarch64_neon_ld4:
+ NewOpc = AArch64ISD::LD4post;
+ NumVecs = 4;
+ break;
+ case Intrinsic::aarch64_neon_st2:
+ NewOpc = AArch64ISD::ST2post;
+ NumVecs = 2;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st3:
+ NewOpc = AArch64ISD::ST3post;
+ NumVecs = 3;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st4:
+ NewOpc = AArch64ISD::ST4post;
+ NumVecs = 4;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_ld1x2:
+ NewOpc = AArch64ISD::LD1x2post;
+ NumVecs = 2;
+ break;
+ case Intrinsic::aarch64_neon_ld1x3:
+ NewOpc = AArch64ISD::LD1x3post;
+ NumVecs = 3;
+ break;
+ case Intrinsic::aarch64_neon_ld1x4:
+ NewOpc = AArch64ISD::LD1x4post;
+ NumVecs = 4;
+ break;
+ case Intrinsic::aarch64_neon_st1x2:
+ NewOpc = AArch64ISD::ST1x2post;
+ NumVecs = 2;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st1x3:
+ NewOpc = AArch64ISD::ST1x3post;
+ NumVecs = 3;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_st1x4:
+ NewOpc = AArch64ISD::ST1x4post;
+ NumVecs = 4;
+ IsStore = true;
+ break;
+ case Intrinsic::aarch64_neon_ld2r:
+ NewOpc = AArch64ISD::LD2DUPpost;
+ NumVecs = 2;
+ IsDupOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld3r:
+ NewOpc = AArch64ISD::LD3DUPpost;
+ NumVecs = 3;
+ IsDupOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld4r:
+ NewOpc = AArch64ISD::LD4DUPpost;
+ NumVecs = 4;
+ IsDupOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld2lane:
+ NewOpc = AArch64ISD::LD2LANEpost;
+ NumVecs = 2;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld3lane:
+ NewOpc = AArch64ISD::LD3LANEpost;
+ NumVecs = 3;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_ld4lane:
+ NewOpc = AArch64ISD::LD4LANEpost;
+ NumVecs = 4;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_st2lane:
+ NewOpc = AArch64ISD::ST2LANEpost;
+ NumVecs = 2;
+ IsStore = true;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_st3lane:
+ NewOpc = AArch64ISD::ST3LANEpost;
+ NumVecs = 3;
+ IsStore = true;
+ IsLaneOp = true;
+ break;
+ case Intrinsic::aarch64_neon_st4lane:
+ NewOpc = AArch64ISD::ST4LANEpost;
+ NumVecs = 4;
+ IsStore = true;
+ IsLaneOp = true;
+ break;
}
EVT VecTy;
@@ -22066,14 +22160,14 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = VecTy;
- Tys[n++] = MVT::i64; // Type of write back register
- Tys[n] = MVT::Other; // Type of the chain
+ Tys[n++] = MVT::i64; // Type of write back register
+ Tys[n] = MVT::Other; // Type of the chain
SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
- MemInt->getMemoryVT(),
- MemInt->getMemOperand());
+ SDValue UpdN =
+ DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
+ MemInt->getMemoryVT(), MemInt->getMemOperand());
// Update the uses.
std::vector<SDValue> NewResults;
@@ -22091,16 +22185,16 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
// Checks to see if the value is the prescribed width and returns information
// about its extension mode.
-static
-bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
+static bool checkValueWidth(SDValue V, unsigned width,
+ ISD::LoadExtType &ExtType) {
ExtType = ISD::NON_EXTLOAD;
- switch(V.getNode()->getOpcode()) {
+ switch (V.getNode()->getOpcode()) {
default:
return false;
case ISD::LOAD: {
LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
- if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
- || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
+ if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) ||
+ (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
ExtType = LoadNode->getExtensionType();
return true;
}
@@ -22108,8 +22202,8 @@ bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
}
case ISD::AssertSext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
- if ((TypeNode->getVT() == MVT::i8 && width == 8)
- || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ if ((TypeNode->getVT() == MVT::i8 && width == 8) ||
+ (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::SEXTLOAD;
return true;
}
@@ -22117,8 +22211,8 @@ bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
}
case ISD::AssertZext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
- if ((TypeNode->getVT() == MVT::i8 && width == 8)
- || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ if ((TypeNode->getVT() == MVT::i8 && width == 8) ||
+ (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::ZEXTLOAD;
return true;
}
@@ -22209,9 +22303,9 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
// the whole range we can just adjust the input and avoid writing equations
// for sign extended inputs.
if (ExtType == ISD::SEXTLOAD)
- AddConstant -= (1 << (width-1));
+ AddConstant -= (1 << (width - 1));
- switch(CC) {
+ switch (CC) {
case AArch64CC::LE:
case AArch64CC::GT:
if ((AddConstant == 0) ||
@@ -22222,22 +22316,20 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
break;
case AArch64CC::LT:
case AArch64CC::GE:
- if ((AddConstant == 0) ||
- (AddConstant >= 0 && CompConstant <= 0) ||
+ if ((AddConstant == 0) || (AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::HI:
case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
- (AddConstant <= 0 && CompConstant >= -1 &&
- CompConstant < AddConstant + MaxUInt))
+ (AddConstant <= 0 && CompConstant >= -1 &&
+ CompConstant < AddConstant + MaxUInt))
return true;
- break;
+ break;
case AArch64CC::PL:
case AArch64CC::MI:
- if ((AddConstant == 0) ||
- (AddConstant > 0 && CompConstant <= 0) ||
+ if ((AddConstant == 0) || (AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
break;
@@ -22318,11 +22410,10 @@ static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode,
return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
}
-static
-SDValue performCONDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG, unsigned CCIndex,
- unsigned CmpIndex) {
+static SDValue performCONDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG, unsigned CCIndex,
+ unsigned CmpIndex) {
unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
unsigned CondOpcode = SubsNode->getOpcode();
@@ -22376,19 +22467,20 @@ SDValue performCONDCombine(SDNode *N,
if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
- !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
+ !checkValueWidth(AddInputValue1, MaskBits, ExtType))
return SDValue();
- if(!isEquivalentMaskless(CC, MaskBits, ExtType,
- cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
- cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
+ if (!isEquivalentMaskless(
+ CC, MaskBits, ExtType,
+ cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
+ cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
return SDValue();
// The AND is not necessary, remove it.
- SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
- SubsNode->getValueType(1));
- SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
+ SDVTList VTs =
+ DAG.getVTList(SubsNode->getValueType(0), SubsNode->getValueType(1));
+ SDValue Ops[] = {AddValue, SubsNode->getOperand(1)};
SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
@@ -22574,7 +22666,7 @@ static SDValue performCSELCombine(SDNode *N,
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
- return Folded;
+ return Folded;
return performCONDCombine(N, DCI, DAG, 2, 3);
}
@@ -22702,8 +22794,8 @@ static SDValue performSETCCCombine(SDNode *N,
if (FromVT.isFixedLengthVector() &&
FromVT.getVectorElementType() == MVT::i1) {
bool IsNull = isNullConstant(RHS);
- LHS = DAG.getNode(IsNull ? ISD::VECREDUCE_OR : ISD::VECREDUCE_AND,
- DL, MVT::i1, LHS->getOperand(0));
+ LHS = DAG.getNode(IsNull ? ISD::VECREDUCE_OR : ISD::VECREDUCE_AND, DL,
+ MVT::i1, LHS->getOperand(0));
LHS = DAG.getNode(IsNull ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, DL, ToVT,
LHS);
return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
@@ -23032,8 +23124,7 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
- IfTrue, IfFalse);
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, IfTrue, IfFalse);
}
/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
@@ -23090,17 +23181,15 @@ static SDValue performSelectCombine(SDNode *N,
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
- SDValue LHS =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
- SDValue RHS =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
+ SDValue LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
// Now duplicate the comparison mask we want across all other lanes.
SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
- Mask = DAG.getNode(ISD::BITCAST, DL,
- ResVT.changeVectorElementTypeToInteger(), Mask);
+ Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(),
+ Mask);
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
@@ -23495,8 +23584,8 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// Sign extend of an unsigned unpack -> signed unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
- unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
- : AArch64ISD::SUNPKLO;
+ unsigned SOpc =
+ Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI : AArch64ISD::SUNPKLO;
// Push the sign extend to the operand of the unpack
// This is necessary where, for example, the operand of the unpack
@@ -24727,12 +24816,12 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i64, MVT::i64);
if (DAG.getDataLayout().isBigEndian())
- std::swap (VLo, VHi);
+ std::swap(VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
- const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+ const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1};
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}
@@ -24751,8 +24840,8 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
SDValue Ops[] = {
createGPRPairNode(DAG, N->getOperand(2)), // Compare value
createGPRPairNode(DAG, N->getOperand(3)), // Store value
- N->getOperand(1), // Ptr
- N->getOperand(0), // Chain in
+ N->getOperand(1), // Ptr
+ N->getOperand(0), // Chain in
};
unsigned Opcode;
@@ -25062,8 +25151,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
LoadNode->getMemoryVT() != MVT::i128) {
- // Non-volatile or atomic loads are optimized later in AArch64's load/store
- // optimizer.
+ // Non-volatile or atomic loads are optimized later in AArch64's
+ // load/store optimizer.
return;
}
@@ -25112,30 +25201,30 @@ void AArch64TargetLowering::ReplaceNodeResults(
case Intrinsic::aarch64_sve_clasta_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
- auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
- N->getOperand(1), Op2, N->getOperand(3));
+ auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32, N->getOperand(1),
+ Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_clastb_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
- auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
- N->getOperand(1), Op2, N->getOperand(3));
+ auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32, N->getOperand(1),
+ Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lasta: {
SDLoc DL(N);
- auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
- N->getOperand(1), N->getOperand(2));
+ auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32, N->getOperand(1),
+ N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lastb: {
SDLoc DL(N);
- auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
- N->getOperand(1), N->getOperand(2));
+ auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32, N->getOperand(1),
+ N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
@@ -25426,7 +25515,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
}
- Type *Tys[] = { Addr->getType() };
+ Type *Tys[] = {Addr->getType()};
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
@@ -25469,11 +25558,12 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
- Type *Tys[] = { Addr->getType() };
+ Type *Tys[] = {Addr->getType()};
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
- IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
+ IntegerType *IntValTy =
+ Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
Val = Builder.CreateBitCast(Val, IntValTy);
CallInst *CI = Builder.CreateCall(
@@ -25587,7 +25677,7 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
// may be beneficial to sink in other cases, but we would have to check that
// the cmp would not get folded into the br to form a cbz for these to be
// beneficial.
- ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
return false;
return Mask->getValue().isPowerOf2();
@@ -25648,9 +25738,9 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
- assert(Entry->getParent()->getFunction().hasFnAttribute(
- Attribute::NoUnwind) &&
- "Function should be nounwind in insertCopiesSplitCSR!");
+ assert(
+ Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
@@ -25754,9 +25844,7 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
}
// Unlike X86, we let frame lowering assign offsets to all catch objects.
-bool AArch64TargetLowering::needsFixedCatchObjects() const {
- return false;
-}
+bool AArch64TargetLowering::needsFixedCatchObjects() const { return false; }
bool AArch64TargetLowering::shouldLocalize(
const MachineInstr &MI, const TargetTransformInfo *TTI) const {
@@ -26340,9 +26428,9 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
- assert((!V.getValueType().isVector() ||
- V.getValueType().isScalableVector()) &&
- "Only scalable vectors are supported!");
+ assert(
+ (!V.getValueType().isVector() || V.getValueType().isScalableVector()) &&
+ "Only scalable vectors are supported!");
Operands.push_back(V);
}
@@ -26384,8 +26472,9 @@ SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
return convertFromScalableVector(DAG, VT, ScalableRes);
}
-SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
- SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
+ SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue AccOp = ScalarOp.getOperand(0);
SDValue VecOp = ScalarOp.getOperand(1);
@@ -26406,14 +26495,15 @@ SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
DAG.getUNDEF(ContainerVT), AccOp, Zero);
// Perform reduction.
- SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
- Pg, AccOp, VecOp);
+ SDValue Rdx =
+ DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT, Pg, AccOp, VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
}
-SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
- SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
+ SelectionDAG &DAG) const {
SDLoc DL(ReduceOp);
SDValue Op = ReduceOp.getOperand(0);
EVT OpVT = Op.getValueType();
@@ -26470,16 +26560,16 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
}
// UADDV always returns an i64 result.
- EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
- SrcVT.getVectorElementType();
+ EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64
+ : SrcVT.getVectorElementType();
EVT RdxVT = SrcVT;
if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
RdxVT = getPackedSVEVectorVT(ResVT);
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
- SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
- Rdx, DAG.getConstant(0, DL, MVT::i64));
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx,
+ DAG.getConstant(0, DL, MVT::i64));
// The VEC_REDUCE nodes expect an element size result.
if (ResVT != ScalarOp.getValueType())
@@ -26488,9 +26578,8 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
return Res;
}
-SDValue
-AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
@@ -26507,8 +26596,7 @@ AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
Mask = DAG.getNode(ISD::TRUNCATE, DL,
MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
- auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
- Mask, Op1, Op2);
+ auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT, Mask, Op1, Op2);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
@@ -26597,16 +26685,16 @@ AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
SDValue Pg = getPredicateForVector(DAG, DL, VT);
EVT SrcVT = Val.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
- EVT ExtendVT = ContainerVT.changeVectorElementType(
- SrcVT.getVectorElementType());
+ EVT ExtendVT =
+ ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
Val = getSVESafeBitCast(ExtendVT, Val, DAG);
- Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
- Pg, Val, DAG.getUNDEF(ContainerVT));
+ Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, Pg,
+ Val, DAG.getUNDEF(ContainerVT));
return convertFromScalableVector(DAG, VT, Val);
}
@@ -26621,8 +26709,8 @@ AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
- EVT RoundVT = ContainerSrcVT.changeVectorElementType(
- VT.getVectorElementType());
+ EVT RoundVT =
+ ContainerSrcVT.changeVectorElementType(VT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
@@ -26725,7 +26813,7 @@ AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
if (VT.bitsGT(SrcVT)) {
EVT CvtVT = ContainerDstVT.changeVectorElementType(
- ContainerSrcVT.getVectorElementType());
+ ContainerSrcVT.getVectorElementType());
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
@@ -26985,8 +27073,8 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
SDLoc DL(Op);
EVT InVT = Op.getValueType();
- assert(VT.isScalableVector() && isTypeLegal(VT) &&
- InVT.isScalableVector() && isTypeLegal(InVT) &&
+ assert(VT.isScalableVector() && isTypeLegal(VT) && InVT.isScalableVector() &&
+ isTypeLegal(InVT) &&
"Only expect to cast between legal scalable vector types!");
assert(VT.getVectorElementType() != MVT::i1 &&
InVT.getVectorElementType() != MVT::i1 &&
@@ -27199,7 +27287,6 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
Intrinsic::aarch64_neon_vcmla_rot180,
Intrinsic::aarch64_neon_vcmla_rot270};
-
return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,
{Accumulator, InputA, InputB});
}
>From ec9659a158a5cc8c7299ef15f97b4564e518fb73 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 28 Feb 2024 11:57:53 +0000
Subject: [PATCH 4/4] Removed seeing through bitcasts
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 90f68a77867db5..9ecd61ba7132bb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -269,10 +269,6 @@ static bool isMergePassthruOpcode(unsigned Opc) {
// Returns true if inactive lanes are known to be zeroed by construction.
static bool isZeroingInactiveLanes(SDValue Op) {
- // Skip bitcasts nodes
- while (Op->getOpcode() == ISD::BITCAST)
- Op = Op->getOperand(0);
-
switch (Op.getOpcode()) {
default:
return false;
More information about the llvm-commits
mailing list