[clang] 42a56bf - [SveEmitter] Add builtins for gather prefetches
Sander de Smalen via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 29 03:54:23 PDT 2020
Author: Sander de Smalen
Date: 2020-04-29T11:52:49+01:00
New Revision: 42a56bf63f699a620a57c34474510d9937ebf715
URL: https://github.com/llvm/llvm-project/commit/42a56bf63f699a620a57c34474510d9937ebf715
DIFF: https://github.com/llvm/llvm-project/commit/42a56bf63f699a620a57c34474510d9937ebf715.diff
LOG: [SveEmitter] Add builtins for gather prefetches
Patch by Andrzej Warzynski
Reviewed By: efriedma
Tags: #clang
Differential Revision: https://reviews.llvm.org/D78677
Added:
Modified:
clang/include/clang/Basic/TargetBuiltins.h
clang/include/clang/Basic/arm_sve.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
clang/utils/TableGen/SveEmitter.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 0a06ba3e5ecc..bf07a8950f28 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -242,6 +242,7 @@ namespace clang {
bool isReverseCompare() const { return Flags & ReverseCompare; }
bool isAppendSVALL() const { return Flags & IsAppendSVALL; }
bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; }
+ bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; }
uint64_t getBits() const { return Flags; }
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 20a055bc6d38..a028487a96b9 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -98,6 +98,8 @@
// G: pointer to uint32_t
// H: pointer to uint64_t
+// Q: const pointer to void
+
// S: const pointer to int8_t
// T: const pointer to int16_t
// U: const pointer to int32_t
@@ -185,6 +187,7 @@ def IsByteIndexed : FlagType<0x01000000>;
def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
+def IsGatherPrefetch : FlagType<0x10000000>;
def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
@@ -569,6 +572,39 @@ def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPclJ", "s", [IsPrefetch], MemEltTyInt16
def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">;
def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPclJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">;
+// Prefetch (Vector bases)
+def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">;
+def SVPRFH_GATHER_BASES : MInst<"svprfh_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
+def SVPRFW_GATHER_BASES : MInst<"svprfw_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">;
+def SVPRFD_GATHER_BASES : MInst<"svprfd_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">;
+
+// Prefetch (Scalar base, Vector offsets)
+def SVPRFB_GATHER_32B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_sxtw_index">;
+def SVPRFH_GATHER_32B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_sxtw_index">;
+def SVPRFW_GATHER_32B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_sxtw_index">;
+def SVPRFD_GATHER_32B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_sxtw_index">;
+
+def SVPRFB_GATHER_64B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_index">;
+def SVPRFH_GATHER_64B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">;
+def SVPRFW_GATHER_64B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">;
+def SVPRFD_GATHER_64B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">;
+
+def SVPRFB_GATHER_32B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_uxtw_index">;
+def SVPRFH_GATHER_32B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_uxtw_index">;
+def SVPRFW_GATHER_32B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_uxtw_index">;
+def SVPRFD_GATHER_32B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_uxtw_index">;
+
+def SVPRFB_GATHER_64B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_index">;
+def SVPRFH_GATHER_64B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">;
+def SVPRFW_GATHER_64B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">;
+def SVPRFD_GATHER_64B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">;
+
+// Prefetch (Vector bases, scalar offset)
+def SVPRFB_GATHER_BASES_OFFSET : MInst<"svprfb_gather[_{2}base]_offset", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">;
+def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
+def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">;
+def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">;
+
////////////////////////////////////////////////////////////////////////////////
// Integer arithmetic
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 95ef19e36607..e0b7e8a2dd78 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7744,6 +7744,39 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, Ops);
}
+Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ // The gather prefetches are overloaded on the vector input - this can either
+ // be the vector of base addresses or vector of offsets.
+ auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
+ if (!OverloadedTy)
+ OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
+
+ // Cast the predicate from svbool_t to the right number of elements.
+ Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
+
+ // vector + imm addressing modes
+ if (Ops[1]->getType()->isVectorTy()) {
+ if (Ops.size() == 3) {
+ // Pass 0 for 'vector+imm' when the index is omitted.
+ Ops.push_back(ConstantInt::get(Int64Ty, 0));
+
+ // The sv_prfop is the last operand in the builtin and IR intrinsic.
+ std::swap(Ops[2], Ops[3]);
+ } else {
+ // Index needs to be passed as scaled offset.
+ llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
+ unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
+ Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
+ Ops[2] = Builder.CreateMul(Ops[2], Scale);
+ }
+ }
+
+ Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
+ return Builder.CreateCall(F, Ops);
+}
+
Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned BuiltinID) {
@@ -7903,6 +7936,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isPrefetch())
return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isGatherPrefetch())
+ return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (Builtin->LLVMIntrinsic != 0) {
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
InsertExplicitZeroOperand(Builder, Ty, Ops);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 0cc626bf5ecc..0e09eb75e8ad 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3936,6 +3936,9 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
+ llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
index f29cb995230c..a361d3ab7d08 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
@@ -102,3 +102,76 @@ void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum)
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]], i32 0)
return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP);
}
+
+void test_svprfb_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+ // CHECK-LABEL: test_svprfb_gather_u32base
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfb_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+ // CHECK-LABEL: test_svprfb_gather_u64base
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfb_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_s32offset(svbool_t pg, const void *base, svint32_t offsets)
+{
+ // CHECK-LABEL: test_svprfb_gather_s32offset
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfb_gather_,s32,offset,)(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_s64offset(svbool_t pg, const void *base, svint64_t offsets)
+{
+ // CHECK-LABEL: test_svprfb_gather_s64offset
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfb_gather_,s64,offset,)(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u32offset(svbool_t pg, const void *base, svuint32_t offsets)
+{
+ // CHECK-LABEL: test_svprfb_gather_u32offset
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfb_gather_,u32,offset,)(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u64offset(svbool_t pg, const void *base, svuint64_t offsets)
+{
+ // CHECK-LABEL: test_svprfb_gather_u64offset
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfb_gather_,u64,offset,)(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u32base_offset(svbool_t pg, svuint32_t bases, int64_t offset)
+{
+ // CHECK-LABEL: test_svprfb_gather_u32base_offset
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset, i32 0)
+ // CHECK: ret void
+ return svprfb_gather_u32base_offset(pg, bases, offset, SV_PLDL1KEEP);
+ return SVE_ACLE_FUNC(svprfb_gather,_u32base,_offset,)(pg, bases, offset, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u64base_offset(svbool_t pg, svuint64_t bases, int64_t offset)
+{
+ // CHECK-LABEL: test_svprfb_gather_u64base_offset
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfb_gather,_u64base,_offset,)(pg, bases, offset, SV_PLDL1KEEP);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
index 59d3fbfb7e1b..b7dde06e397f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
@@ -116,3 +116,77 @@ void test_svprfd_vnum(svbool_t pg, const void *base, int64_t vnum)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
return svprfd_vnum(pg, base, vnum, SV_PLDL1KEEP);
}
+
+void test_svprfd_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+ // CHECK-LABEL: test_svprfd_gather_u32base
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+ // CHECK-LABEL: test_svprfd_gather_u64base
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_s32index(svbool_t pg, const void *base, svint32_t indices)
+{
+ // CHECK-LABEL: test_svprfd_gather_s32index
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather_,s32,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_s64index(svbool_t pg, const void *base, svint64_t indices)
+{
+ // CHECK-LABEL: test_svprfd_gather_s64index
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather_,s64,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u32index(svbool_t pg, const void *base, svuint32_t indices)
+{
+ // CHECK-LABEL: test_svprfd_gather_u32index
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather_,u32,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u64index(svbool_t pg, const void *base, svuint64_t indices)
+{
+ // CHECK-LABEL: test_svprfd_gather_u64index
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather_,u64,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index)
+{
+ // CHECK-LABEL: test_svprfd_gather_u32base_index
+ // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather,_u32base,_index,)(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index)
+{
+ // CHECK-LABEL: test_svprfd_gather_u64base_index
+ // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+ // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfd_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
index a6290f3f2f0c..e988448ad93e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
@@ -116,3 +116,75 @@ void test_svprfh_vnum(svbool_t pg, const void *base, int64_t vnum)
// CHECK: @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
return svprfh_vnum(pg, base, vnum, SV_PLDL1KEEP);
}
+
+void test_svprfh_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+ // CHECK-LABEL: test_svprfh_gather_u32base
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfh_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+ // CHECK-LABEL: test_svprfh_gather_u64base
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+ return SVE_ACLE_FUNC(svprfh_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_s32index(svbool_t pg, const void *base, svint32_t indices)
+{
+ // CHECK-LABEL: test_svprfh_gather_s32index
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfh_gather_,s32,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_s64index(svbool_t pg, const void *base, svint64_t indices)
+{
+ // CHECK-LABEL: test_svprfh_gather_s64index
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+ return SVE_ACLE_FUNC(svprfh_gather_,s64,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u32index(svbool_t pg, const void *base, svuint32_t indices)
+{
+ // CHECK-LABEL: test_svprfh_gather_u32index
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfh_gather_,u32,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u64index(svbool_t pg, const void *base, svuint64_t indices)
+{
+ // CHECK-LABEL: test_svprfh_gather_u64index
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfh_gather_,u64,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index)
+{
+ // CHECK-LABEL: test_svprfh_gather_u32base_index
+ // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfh_gather,_u32base,_index,)(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index)
+{
+ // CHECK-LABEL: test_svprfh_gather_u64base_index
+ // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+ // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfh_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
index e6b1b603e475..d71767cad882 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
@@ -116,3 +116,77 @@ void test_svprfw_vnum(svbool_t pg, const void *base, int64_t vnum)
// CHECK: @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
return svprfw_vnum(pg, base, vnum, SV_PLDL1KEEP);
}
+
+void test_svprfw_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+ // CHECK-LABEL: test_svprfw_gather_u32base
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+ // CHECK-LABEL: test_svprfw_gather_u64base
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_s32index(svbool_t pg, const void *base, svint32_t indices)
+{
+ // CHECK-LABEL: test_svprfw_gather_s32index
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather_,s32,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_s64index(svbool_t pg, const void *base, svint64_t indices)
+{
+ // CHECK-LABEL: test_svprfw_gather_s64index
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather_,s64,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u32index(svbool_t pg, const void *base, svuint32_t indices)
+{
+ // CHECK-LABEL: test_svprfw_gather_u32index
+ // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather_,u32,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u64index(svbool_t pg, const void *base, svuint64_t indices)
+{
+ // CHECK-LABEL: test_svprfw_gather_u64index
+ // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather_,u64,index,)(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index)
+{
+ // CHECK-LABEL: test_svprfw_gather_u32base_index
+ // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+ // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather,_u32base,_index,)(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index)
+{
+ // CHECK-LABEL: test_svprfw_gather_u64base_index
+ // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+ // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+ // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+ // CHECK: ret void
+ return SVE_ACLE_FUNC(svprfw_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP);
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 9376cd63939f..a83450a76382 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -606,6 +606,12 @@ void SVEType::applyModifier(char Mod) {
Float = true;
ElementBitwidth = 64;
break;
+ case 'Q':
+ Constant = true;
+ Pointer = true;
+ Void = true;
+ NumVectors = 0;
+ break;
case 'S':
Constant = true;
Pointer = true;
More information about the cfe-commits
mailing list