[llvm] 9b5f197 - [AArch64][SME2/SVE2p1] Add predicate-as-counter intrinsics for pext (multi)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue May 16 07:04:57 PDT 2023
Author: Sander de Smalen
Date: 2023-05-16T14:04:16Z
New Revision: 9b5f19714cf7f1ba10609faf3852bc6c4558fab3
URL: https://github.com/llvm/llvm-project/commit/9b5f19714cf7f1ba10609faf3852bc6c4558fab3
DIFF: https://github.com/llvm/llvm-project/commit/9b5f19714cf7f1ba10609faf3852bc6c4558fab3.diff
LOG: [AArch64][SME2/SVE2p1] Add predicate-as-counter intrinsics for pext (multi)
These intrinsics are used to implement the pext intrinsics that extract
two predicates (mask) from a predicate-as-counter value, e.g.
__attribute__((arm_streaming))
svboolx2_t svpext_lane_c8_x2(svcount_t pnn, uint64_t imm);
As described in https://github.com/ARM-software/acle/pull/217
Reviewed By: kmclaughlin
Differential Revision: https://reviews.llvm.org/D150442
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 428ef8924ab6..d7f650e1a12a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2773,6 +2773,10 @@ let TargetPrefix = "aarch64" in {
[llvm_aarch64_svcount_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_aarch64_sve_pext_x2
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_aarch64_sve_ptrue_c8
: DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b9714e52f20b..5f54b878f5c1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -372,6 +372,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool IsIntr = false);
void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
bool IsZmMulti, unsigned Opcode);
+ void SelectPExtPair(SDNode *N, unsigned Opc);
void SelectWhilePair(SDNode *N, unsigned Opc);
void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
@@ -1652,6 +1653,28 @@ static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
}
+// This function is almost identical to SelectWhilePair, but has an
+// extra check on the range of the immediate operand.
+// TODO: Merge these two functions together at some point?
+void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
+ // Immediate can be either 0 or 1.
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (Imm->getZExtValue() > 1)
+ return;
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
+ SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
+ SDValue SuperReg = SDValue(WhilePair, 0);
+
+ for (unsigned I = 0; I < 2; ++I)
+ ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
+ AArch64::psub0 + I, DL, VT, SuperReg));
+
+ CurDAG->RemoveDeadNode(N);
+}
+
void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
@@ -5359,6 +5382,14 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::UUNPK_VG4_4Z2Z_D}))
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
return;
+ case Intrinsic::aarch64_sve_pext_x2: {
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
+ AArch64::PEXT_2PCI_D}))
+ SelectPExtPair(Node, Op);
+ return;
+ }
}
break;
}
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll
index b36d8a6ff8ab..33d5f27b08fd 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll
@@ -71,6 +71,75 @@ declare <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount"
declare <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount"), i32)
declare <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount"), i32)
+define {<vscale x 16 x i1>,<vscale x 16 x i1>} @pext_x2_b(target("aarch64.svcount") %x) nounwind {
+; CHECK-LABEL: pext_x2_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: pext { p0.b, p1.b }, pn8[1]
+; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = call {<vscale x 16 x i1>,<vscale x 16 x i1>} @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") %x, i32 1)
+ ret {<vscale x 16 x i1>,<vscale x 16 x i1>} %res
+}
+
+define {<vscale x 8 x i1>,<vscale x 8 x i1>} @pext_x2_h(target("aarch64.svcount") %x) nounwind {
+; CHECK-LABEL: pext_x2_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: pext { p0.h, p1.h }, pn8[1]
+; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = call {<vscale x 8 x i1>,<vscale x 8 x i1>} @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") %x, i32 1)
+ ret {<vscale x 8 x i1>,<vscale x 8 x i1>} %res
+}
+
+define {<vscale x 4 x i1>,<vscale x 4 x i1>} @pext_x2_s(target("aarch64.svcount") %x) nounwind {
+; CHECK-LABEL: pext_x2_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: pext { p0.s, p1.s }, pn8[1]
+; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = call {<vscale x 4 x i1>,<vscale x 4 x i1>} @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") %x, i32 1)
+ ret {<vscale x 4 x i1>,<vscale x 4 x i1>} %res
+}
+
+define {<vscale x 2 x i1>,<vscale x 2 x i1>} @pext_x2_d(target("aarch64.svcount") %x) nounwind {
+; CHECK-LABEL: pext_x2_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: mov p8.b, p0.b
+; CHECK-NEXT: pext { p0.d, p1.d }, pn8[1]
+; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %res = call {<vscale x 2 x i1>,<vscale x 2 x i1>} @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") %x, i32 1)
+ ret {<vscale x 2 x i1>,<vscale x 2 x i1>} %res
+}
+
+declare {<vscale x 16 x i1>,<vscale x 16 x i1>} @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount"), i32)
+declare {<vscale x 8 x i1>,<vscale x 8 x i1>} @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount"), i32)
+declare {<vscale x 4 x i1>,<vscale x 4 x i1>} @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount"), i32)
+declare {<vscale x 2 x i1>,<vscale x 2 x i1>} @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount"), i32)
+
define target("aarch64.svcount") @ptrue_b() nounwind {
; CHECK-LABEL: ptrue_b:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list