[llvm] [AArch64] optimise SVE prefetch intrinsics with no active lanes (PR #103052)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 06:18:32 PDT 2024
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/103052
>From f3771db723e013442846a94365c179eacea988c6 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 13 Aug 2024 12:42:01 +0000
Subject: [PATCH] [AArch64] optimise SVE prefetch intrinsics with no active
lanes
---
.../AArch64/AArch64TargetTransformInfo.cpp | 18 ++
.../sve-intrinsic-comb-no-active-lanes-prf.ll | 156 ++++++++++++++++++
2 files changed, 174 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 49b462668b6306..b430e738110b83 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2157,6 +2157,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
return instCombineSVENoActiveUnaryZero(IC, II);
+ case Intrinsic::aarch64_sve_prf:
+ case Intrinsic::aarch64_sve_prfb_gather_index:
+ case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_index:
+ case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_index:
+ case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_index:
+ case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
+ return instCombineSVENoActiveUnaryErase(IC, II, 0);
case Intrinsic::aarch64_neon_fmaxnm:
case Intrinsic::aarch64_neon_fminnm:
return instCombineMaxMinNM(IC, II);
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll
new file mode 100644
index 00000000000000..1de582dff52dad
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @test_prf(ptr %base){
+; CHECK-LABEL: define void @test_prf(
+; CHECK-SAME: ptr [[BASE:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> zeroinitializer, ptr %base, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfb_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfb_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfb_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfb_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfd_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfd_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfd_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfd_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfh_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfh_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfh_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfh_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfw_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfw_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfw_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfw_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
More information about the llvm-commits
mailing list