[llvm] 3b95b81 - [AArch64][SME2/SVE2p1] Add predicate-as-counter intrinsics for ptrue/cntp
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri May 12 01:44:14 PDT 2023
Author: Sander de Smalen
Date: 2023-05-12T08:43:12Z
New Revision: 3b95b818135f85376082621149e59c297f640c14
URL: https://github.com/llvm/llvm-project/commit/3b95b818135f85376082621149e59c297f640c14
DIFF: https://github.com/llvm/llvm-project/commit/3b95b818135f85376082621149e59c297f640c14.diff
LOG: [AArch64][SME2/SVE2p1] Add predicate-as-counter intrinsics for ptrue/cntp
These intrinsics are used to implement:
* svptrue_c8(), svptrue_c16(), etc.
* svcntp_c8(svcount_t pnn, uint64_t vl), svcntp_c16(...), etc.
As described in https://github.com/ARM-software/acle/pull/217
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D150263
Added:
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll
Modified:
llvm/include/llvm/IR/Intrinsics.h
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/IR/Function.cpp
llvm/lib/Target/AArch64/SVEInstrFormats.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index c677983b71867..6ac02567c43b5 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -138,6 +138,7 @@ namespace Intrinsic {
AMX,
PPCQuad,
AnyPtrToElt,
+ AArch64Svcount,
} Kind;
union {
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b8666b52ecec4..9b49df277dd92 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -316,6 +316,7 @@ def IIT_FUNCREF : IIT_VT<funcref, 55>;
def IIT_ANYPTR_TO_ELT : IIT_Base<56>;
def IIT_I2 : IIT_Int<2, 57>;
def IIT_I4 : IIT_Int<4, 58>;
+def IIT_AARCH64_SVCOUNT : IIT_VT<aarch64svcount, 59>;
}
defvar IIT_all_FixedTypes = !filter(iit, IIT_all,
@@ -511,6 +512,8 @@ def llvm_token_ty : LLVMType<token>; // token
def llvm_x86mmx_ty : LLVMType<x86mmx>;
def llvm_ptrx86mmx_ty : LLVMPointerType<llvm_x86mmx_ty>; // <1 x i64>*
+def llvm_aarch64_svcount_ty : LLVMType<aarch64svcount>;
+
def llvm_x86amx_ty : LLVMType<x86amx>;
def llvm_v2i1_ty : LLVMType<v2i1>; // 2 x i1
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index eedc4923b3826..599917ce2e04b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2764,6 +2764,33 @@ let TargetPrefix = "aarch64" in {
[llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
}
+ //
+ // Predicate-as-counter intrinsics
+ //
+
+
+ def int_aarch64_sve_ptrue_c8
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+ def int_aarch64_sve_ptrue_c16
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+ def int_aarch64_sve_ptrue_c32
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+ def int_aarch64_sve_ptrue_c64
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+
+ def int_aarch64_sve_cntp_c8
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_aarch64_sve_cntp_c16
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_aarch64_sve_cntp_c32
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_aarch64_sve_cntp_c64
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
//
// SME2 Intrinsics
//
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 300e0227b66f8..fb29918e1fa36 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1099,6 +1099,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_I4:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 4));
return;
+ case IIT_AARCH64_SVCOUNT:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::AArch64Svcount, 0));
+ return;
case IIT_I8:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
return;
@@ -1340,6 +1343,8 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::Double: return Type::getDoubleTy(Context);
case IITDescriptor::Quad: return Type::getFP128Ty(Context);
case IITDescriptor::PPCQuad: return Type::getPPC_FP128Ty(Context);
+ case IITDescriptor::AArch64Svcount:
+ return TargetExtType::get(Context, "aarch64.svcount");
case IITDescriptor::Integer:
return IntegerType::get(Context, D.Integer_Width);
@@ -1514,6 +1519,9 @@ static bool matchIntrinsicType(
case IITDescriptor::Quad: return !Ty->isFP128Ty();
case IITDescriptor::PPCQuad: return !Ty->isPPC_FP128Ty();
case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
+ case IITDescriptor::AArch64Svcount:
+ return !isa<TargetExtType>(Ty) ||
+ cast<TargetExtType>(Ty)->getName() != "aarch64.svcount";
case IITDescriptor::Vector: {
VectorType *VT = dyn_cast<VectorType>(Ty);
return !VT || VT->getElementCount() != D.Vector_Width ||
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index d55c0351afcde..4fd6e56fbb308 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -506,6 +506,12 @@ class SVE_2_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))),
(inst $Op1, ImmTy:$Op2)>;
+multiclass SVE2p1_Cntp_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ Instruction inst> {
+ def : Pat<(vtd (op vt1:$Op1, (i32 2))), (inst $Op1, 0)>;
+ def : Pat<(vtd (op vt1:$Op1, (i32 4))), (inst $Op1, 1)>;
+}
+
class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Operand ImmTy,
Instruction inst>
@@ -9158,9 +9164,9 @@ multiclass sve2p1_two_way_dot_vvi<string mnemonic, bit u, SDPatternOperator intr
def : SVE_4_Op_Imm_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
}
-class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty>
+class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty, SDPatternOperator op>
: I<(outs pnrty:$PNd), (ins ), mnemonic, "\t$PNd",
- "", []>, Sched<[]> {
+ "", [(set pnrty:$PNd, (op))]>, Sched<[]> {
bits<3> PNd;
let Inst{31-24} = 0b00100101;
let Inst{23-22} = sz;
@@ -9172,10 +9178,10 @@ class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty>
multiclass sve2p1_ptrue_pn<string mnemonic> {
- def _B : sve2p1_ptrue_pn<mnemonic, 0b00, PNR8_p8to15>;
- def _H : sve2p1_ptrue_pn<mnemonic, 0b01, PNR16_p8to15>;
- def _S : sve2p1_ptrue_pn<mnemonic, 0b10, PNR32_p8to15>;
- def _D : sve2p1_ptrue_pn<mnemonic, 0b11, PNR64_p8to15>;
+ def _B : sve2p1_ptrue_pn<mnemonic, 0b00, PNR8_p8to15, int_aarch64_sve_ptrue_c8>;
+ def _H : sve2p1_ptrue_pn<mnemonic, 0b01, PNR16_p8to15, int_aarch64_sve_ptrue_c16>;
+ def _S : sve2p1_ptrue_pn<mnemonic, 0b10, PNR32_p8to15, int_aarch64_sve_ptrue_c32>;
+ def _D : sve2p1_ptrue_pn<mnemonic, 0b11, PNR64_p8to15, int_aarch64_sve_ptrue_c64>;
}
@@ -9543,6 +9549,11 @@ multiclass sve2p1_pcount_pn<string mnemonic, bits<3> opc> {
def _H : sve2p1_pcount_pn<mnemonic, opc, 0b01, PNR16>;
def _S : sve2p1_pcount_pn<mnemonic, opc, 0b10, PNR32>;
def _D : sve2p1_pcount_pn<mnemonic, opc, 0b11, PNR64>;
+
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c8, aarch64svcount, !cast<Instruction>(NAME # _B)>;
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c16, aarch64svcount, !cast<Instruction>(NAME # _H)>;
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c32, aarch64svcount, !cast<Instruction>(NAME # _S)>;
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c64, aarch64svcount, !cast<Instruction>(NAME # _D)>;
}
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll
new file mode 100644
index 0000000000000..5a56b86c3b422
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s
+
+define i64 @test_svcntp_c8_vlx2(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c8_vlx2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.b, vlx2
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") %pn, i32 2)
+ ret i64 %res
+}
+
+define i64 @test_svcntp_c8_vlx4(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c8_vlx4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.b, vlx4
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") %pn, i32 4)
+ ret i64 %res
+}
+
+define i64 @test_svcntp_c16_vlx2(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c16_vlx2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.h, vlx2
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") %pn, i32 2)
+ ret i64 %res
+}
+
+define i64 @test_svcntp_c16_vlx4(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c16_vlx4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.h, vlx4
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") %pn, i32 4)
+ ret i64 %res
+}
+
+define i64 @test_svcntp_c32_vlx2(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c32_vlx2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.s, vlx2
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") %pn, i32 2)
+ ret i64 %res
+}
+
+define i64 @test_svcntp_c32_vlx4(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c32_vlx4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.s, vlx4
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") %pn, i32 4)
+ ret i64 %res
+}
+
+define i64 @test_svcntp_c64_vlx2(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c64_vlx2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.d, vlx2
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") %pn, i32 2)
+ ret i64 %res
+}
+
+define i64 @test_svcntp_c64_vlx4(target("aarch64.svcount") %pn) nounwind {
+; CHECK-LABEL: test_svcntp_c64_vlx4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, pn0.d, vlx4
+; CHECK-NEXT: ret
+ %res = call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") %pn, i32 4)
+ ret i64 %res
+}
+
+
+declare i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount"), i32)
+declare i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount"), i32)
+declare i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount"), i32)
+declare i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount"), i32)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll
new file mode 100644
index 0000000000000..49d323c37f576
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+sme2 < %s | FileCheck %s
+
+define target("aarch64.svcount") @ptrue_b() nounwind {
+; CHECK-LABEL: ptrue_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue pn8.b
+; CHECK-NEXT: mov p0.b, p8.b
+; CHECK-NEXT: ret
+ %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+ ret target("aarch64.svcount") %res
+}
+
+define target("aarch64.svcount") @ptrue_h() nounwind {
+; CHECK-LABEL: ptrue_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue pn8.h
+; CHECK-NEXT: mov p0.b, p8.b
+; CHECK-NEXT: ret
+ %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16()
+ ret target("aarch64.svcount") %res
+}
+
+define target("aarch64.svcount") @ptrue_s() nounwind {
+; CHECK-LABEL: ptrue_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue pn8.s
+; CHECK-NEXT: mov p0.b, p8.b
+; CHECK-NEXT: ret
+ %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c32()
+ ret target("aarch64.svcount") %res
+}
+
+define target("aarch64.svcount") @ptrue_d() nounwind {
+; CHECK-LABEL: ptrue_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue pn8.d
+; CHECK-NEXT: mov p0.b, p8.b
+; CHECK-NEXT: ret
+ %res = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c64()
+ ret target("aarch64.svcount") %res
+}
+
+declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
+declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c16()
+declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c32()
+declare target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c64()
More information about the llvm-commits
mailing list