[llvm] [AArch64] optimise SVE prefetch intrinsics with no active lanes (PR #103052)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 13 05:51:46 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (Lukacma)
<details>
<summary>Changes</summary>
This patch extends https://github.com/llvm/llvm-project/pull/73964 and optimises away SVE prefetch intrinsics when predicate is zero.
---
Full diff: https://github.com/llvm/llvm-project/pull/103052.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+18)
- (added) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll (+156)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 49b462668b6306..b430e738110b83 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2157,6 +2157,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
return instCombineSVENoActiveUnaryZero(IC, II);
+ case Intrinsic::aarch64_sve_prf:
+ case Intrinsic::aarch64_sve_prfb_gather_index:
+ case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_index:
+ case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_index:
+ case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_index:
+ case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
+ return instCombineSVENoActiveUnaryErase(IC, II, 0);
case Intrinsic::aarch64_neon_fmaxnm:
case Intrinsic::aarch64_neon_fminnm:
return instCombineMaxMinNM(IC, II);
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll
new file mode 100644
index 00000000000000..1de582dff52dad
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-prf.ll
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @test_prf(ptr %base){
+; CHECK-LABEL: define void @test_prf(
+; CHECK-SAME: ptr [[BASE:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> zeroinitializer, ptr %base, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfb_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfb_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfb_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfb_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfb_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfd_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfd_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfd_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfd_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfd_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfh_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfh_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfh_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfh_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfh_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_index(ptr %base, <vscale x 2 x i64> %indexes){
+; CHECK-LABEL: define void @test_prfw_gather_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.index.nx2vi64(<vscale x 2 x i1> zeroinitializer, ptr %base, <vscale x 2 x i64> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_scalar_offset(<vscale x 4 x i32> %bases){
+; CHECK-LABEL: define void @test_prfw_gather_scalar_offset(
+; CHECK-SAME: <vscale x 4 x i32> [[BASES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %bases, i64 7, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_sxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfw_gather_sxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
+
+define void @test_prfw_gather_uxtw_index(ptr %base, <vscale x 4 x i32> %indexes){
+; CHECK-LABEL: define void @test_prfw_gather_uxtw_index(
+; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDEXES:%.*]]) {
+; CHECK-NEXT: ret void
+;
+ call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nx4vi32(<vscale x 4 x i1> zeroinitializer, ptr %base, <vscale x 4 x i32> %indexes, i32 1)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/103052
More information about the llvm-commits
mailing list