[llvm] fb75451 - [SVE] Custom ISel for fixed length extract/insert_subvector.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 8 02:51:27 PDT 2020
Author: Paul Walker
Date: 2020-07-08T09:49:28Z
New Revision: fb75451775f83c04d53e4e94bb4bd298ea9a882f
URL: https://github.com/llvm/llvm-project/commit/fb75451775f83c04d53e4e94bb4bd298ea9a882f
DIFF: https://github.com/llvm/llvm-project/commit/fb75451775f83c04d53e4e94bb4bd298ea9a882f.diff
LOG: [SVE] Custom ISel for fixed length extract/insert_subvector.
We use extact_subvector and insert_subvector to "cast" between
fixed length and scalable vectors. This patch adds custom c++
based ISel for the following cases:
fixed_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
scalable_vector = ISD::INSERT_SUBVECTOR undef(scalable_vector), fixed_vector, 0
Which result in either EXTRACT_SUBREG/INSERT_SUBREG for NEON sized
vectors or COPY_TO_REGCLASS otherwise.
Differential Revision: https://reviews.llvm.org/D82871
Added:
llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 4ef9bfb3aab6..10c477853353 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -3240,6 +3240,63 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
ReplaceNode(N, N3);
}
+// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
+// vector types larger than NEON don't have a matching SubRegIndex.
+static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
+ assert(V.getValueType().isScalableVector() &&
+ V.getValueType().getSizeInBits().getKnownMinSize() ==
+ AArch64::SVEBitsPerBlock &&
+ "Expected to extract from a packed scalable vector!");
+ assert(VT.isFixedLengthVector() &&
+ "Expected to extract a fixed length vector!");
+
+ SDLoc DL(V);
+ switch (VT.getSizeInBits()) {
+ case 64: {
+ auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+ return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
+ }
+ case 128: {
+ auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
+ return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
+ }
+ default: {
+ auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
+ }
+ }
+}
+
+// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
+// vector types larger than NEON don't have a matching SubRegIndex.
+static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
+ assert(VT.isScalableVector() &&
+ VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
+ "Expected to insert into a packed scalable vector!");
+ assert(V.getValueType().isFixedLengthVector() &&
+ "Expected to insert a fixed length vector!");
+
+ SDLoc DL(V);
+ switch (V.getValueType().getSizeInBits()) {
+ case 64: {
+ auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+ auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
+ SDValue(Container, 0), V, SubReg);
+ }
+ case 128: {
+ auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
+ auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
+ SDValue(Container, 0), V, SubReg);
+ }
+ default: {
+ auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
+ }
+ }
+}
+
void AArch64DAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -3313,6 +3370,52 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
+ case ISD::EXTRACT_SUBVECTOR: {
+ // Bail when not a "cast" like extract_subvector.
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
+ break;
+
+ // Bail when normal isel can do the job.
+ EVT InVT = Node->getOperand(0).getValueType();
+ if (VT.isScalableVector() || InVT.isFixedLengthVector())
+ break;
+
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+ //
+ // NOTE: If the above changes, be aware that selection will still not work
+ // because the td definition of extract_vector does not support extracting
+ // a fixed length vector from a scalable vector.
+
+ ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
+ return;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ // Bail when not a "cast" like insert_subvector.
+ if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
+ break;
+ if (!Node->getOperand(0).isUndef())
+ break;
+
+ // Bail when normal isel should do the job.
+ EVT InVT = Node->getOperand(1).getValueType();
+ if (VT.isFixedLengthVector() || InVT.isScalableVector())
+ break;
+
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+ //
+ // NOTE: If the above changes, be aware that selection will still not work
+ // because the td definition of insert_vector does not support inserting a
+ // fixed length vector into a scalable vector.
+
+ ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
+ return;
+ }
+
case ISD::Constant: {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index aaeb6b459915..729fb8f62912 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -116,6 +116,18 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
+/// Returns true if VT's elements occupy the lowest bit positions of its
+/// associated register class without any intervening space.
+///
+/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
+/// same register class, but only nxv8f16 can be treated as a packed vector.
+static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
+ assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ "Expected legal vector type!");
+ return VT.isFixedLengthVector() ||
+ VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
+}
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -908,6 +920,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// D68877 for more details.
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
if (isTypeLegal(VT)) {
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
@@ -921,16 +934,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, VT, Custom);
if (VT.getScalarType() == MVT::i1)
setOperationAction(ISD::SETCC, VT, Custom);
- } else {
- for (auto VT : { MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32 })
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
}
+
+ for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32})
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
if (isTypeLegal(VT)) {
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
}
@@ -1037,9 +1052,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
- // EXTRACT_SUBVECTOR/INSERT_SUBVECTOR are used to "cast" between scalable
- // and fixed length vector types, although with the current level of support
- // only the former is exercised.
+ // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Lower fixed length vector operations to scalable equivalents.
@@ -3469,6 +3482,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::INSERT_SUBVECTOR:
+ return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
case ISD::UDIV:
@@ -8679,29 +8694,47 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
- assert(!Op.getValueType().isScalableVector() &&
- "Unexpected scalable type for custom lowering EXTRACT_SUBVECTOR");
+ assert(Op.getValueType().isFixedLengthVector() &&
+ "Only cases that extract a fixed length vector are supported!");
- EVT VT = Op.getOperand(0).getValueType();
- SDLoc dl(Op);
- // Just in case...
- if (!VT.isVector())
- return SDValue();
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Size = Op.getValueSizeInBits();
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Cst)
- return SDValue();
- unsigned Val = Cst->getZExtValue();
+ if (InVT.isScalableVector()) {
+ // This will be matched by custom code during ISelDAGToDAG.
+ if (Idx == 0 && isPackedVectorType(InVT, DAG))
+ return Op;
- unsigned Size = Op.getValueSizeInBits();
+ return SDValue();
+ }
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
- if (Val == 0)
+ if (Idx == 0 && InVT.getSizeInBits() <= 128)
return Op;
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
- if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
+ if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64)
+ return Op;
+
+ return SDValue();
+}
+
+SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getValueType().isScalableVector() &&
+ "Only expect to lower inserts into scalable vectors!");
+
+ EVT InVT = Op.getOperand(1).getValueType();
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+
+ // We don't have any patterns for scalable vector yet.
+ if (InVT.isScalableVector() || !useSVEForFixedLengthVectorVT(InVT))
+ return SDValue();
+
+ // This will be matched by custom code during ISelDAGToDAG.
+ if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
return Op;
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 210b8c842701..60ce88576f91 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -850,6 +850,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOp) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
new file mode 100644
index 000000000000..45ebdc78784e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
@@ -0,0 +1,88 @@
+; RUN: llc -aarch64-sve-vector-bits-min=128 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefix=NO_SVE
+; RUN: llc -aarch64-sve-vector-bits-min=256 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=384 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=512 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=640 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=768 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=896 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=1024 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1152 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1280 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1408 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1536 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1664 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1792 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1920 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+
+; Test we can code generater patterns of the form:
+; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
+; scalable_vector = ISD::INSERT_SUBVECTOR scalable_vector, fixed_length_vector, 0
+;
+; NOTE: Currently shufflevector does not support scalable vectors so it cannot
+; be used to model the above operations. Instead these tests rely on knowing
+; how fixed length operation are lowered to scalable ones, with multiple blocks
+; ensuring insert/extract sequences are not folded away.
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Don't use SVE when its registers are no bigger than NEON.
+; NO_SVE-NOT: ptrue
+
+define void @subvector_v8i32(<8 x i32> *%in, <8 x i32>* %out) #0 {
+; CHECK-LABEL: subvector_v8i32:
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
+; CHECK: st1w { [[DATA]] }, [[PG]], [x1]
+; CHECK: ret
+ %a = load <8 x i32>, <8 x i32>* %in
+ br label %bb1
+
+bb1:
+ store <8 x i32> %a, <8 x i32>* %out
+ ret void
+}
+
+define void @subvector_v16i32(<16 x i32> *%in, <16 x i32>* %out) #0 {
+; CHECK-LABEL: subvector_v16i32:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
+; VBITS_GE_512: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
+; VBITS_GE_512: st1w { [[DATA]] }, [[PG]], [x1]
+; CHECKT: ret
+ %a = load <16 x i32>, <16 x i32>* %in
+ br label %bb1
+
+bb1:
+ store <16 x i32> %a, <16 x i32>* %out
+ ret void
+}
+
+define void @subvector_v32i32(<32 x i32> *%in, <32 x i32>* %out) #0 {
+; CHECK-LABEL: subvector_v32i32:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
+; VBITS_GE_1024: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
+; VBITS_GE_1024: st1w { [[DATA]] }, [[PG]], [x1]
+; CHECK: ret
+ %a = load <32 x i32>, <32 x i32>* %in
+ br label %bb1
+
+bb1:
+ store <32 x i32> %a, <32 x i32>* %out
+ ret void
+}
+
+define void @subvector_v64i32(<64 x i32> *%in, <64 x i32>* %out) #0 {
+; CHECK-LABEL: subvector_v64i32:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
+; VBITS_GE_2048: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
+; VBITS_GE_2048: st1w { [[DATA]] }, [[PG]], [x1]
+; CHECK: ret
+ %a = load <64 x i32>, <64 x i32>* %in
+ br label %bb1
+
+bb1:
+ store <64 x i32> %a, <64 x i32>* %out
+ ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
More information about the llvm-commits
mailing list