[llvm] 2d574a1 - [CodeGen][AArch64][SVE] Canonicalize intrinsic rdffr{ => _z}
Peter Waller via llvm-commits
llvm-commits at lists.llvm.org
Thu May 20 09:23:27 PDT 2021
Author: Peter Waller
Date: 2021-05-20T16:22:50Z
New Revision: 2d574a110440597eefe1b2a8b6144e4e89c21d05
URL: https://github.com/llvm/llvm-project/commit/2d574a110440597eefe1b2a8b6144e4e89c21d05
DIFF: https://github.com/llvm/llvm-project/commit/2d574a110440597eefe1b2a8b6144e4e89c21d05.diff
LOG: [CodeGen][AArch64][SVE] Canonicalize intrinsic rdffr{ => _z}
Follow up to D101357 / 3fa6510f6.
Supersedes D102330.
Goal: Use flags setting rdffrs instead of rdffr + ptest.
Problem: RDFFR_P doesn't have have a flags setting equivalent.
Solution: in instcombine, canonicalize to RDFFR_PP at the IR level, and
rely on RDFFR_PP+PTEST => RDFFRS_PP optimization in
AArch64InstrInfo::optimizePTestInstr.
While here:
* Test that rdffr.z+ptest generates a rdffrs.
* Use update_{test,llc}_checks.py on the tests.
* Use sve attribute on functions.
Differential Revision: https://reviews.llvm.org/D102623
Added:
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll
Modified:
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
Removed:
################################################################################
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c
index 9b871ee3a8dc..a85ac7bb5cef 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c
@@ -7,7 +7,8 @@
svbool_t test_svrdffr()
{
// CHECK-LABEL: test_svrdffr
- // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
+ // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>
+ // CHECK-NOT: rdffr
// CHECK: ret <vscale x 16 x i1> %[[INTRINSIC]]
return svrdffr();
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 90762052dc3a..846c07863467 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -470,6 +470,23 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
return IC.replaceInstUsesWith(II, Extract);
}
+static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
+ IntrinsicInst &II) {
+ LLVMContext &Ctx = II.getContext();
+ IRBuilder<> Builder(Ctx);
+ Builder.SetInsertPoint(&II);
+ // Replace rdffr with predicated rdffr.z intrinsic, so that optimizePTestInstr
+ // can work with RDFFR_PP for ptest elimination.
+ auto *AllPat =
+ ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
+ auto *PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
+ {II.getType()}, {AllPat});
+ auto *RDFFR =
+ Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {}, {PTrue});
+ RDFFR->takeName(&II);
+ return IC.replaceInstUsesWith(II, RDFFR);
+}
+
Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -481,6 +498,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:
return instCombineSVEDup(IC, II);
+ case Intrinsic::aarch64_sve_rdffr:
+ return instCombineRDFFR(IC, II);
case Intrinsic::aarch64_sve_lasta:
case Intrinsic::aarch64_sve_lastb:
return instCombineSVELast(IC, II);
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
index 7460037078d1..bc07c972e5fb 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
@@ -1,33 +1,51 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
;
; RDFFR
;
-define <vscale x 16 x i1> @rdffr() {
+define <vscale x 16 x i1> @rdffr() #0 {
; CHECK-LABEL: rdffr:
-; CHECK: rdffr p0.b
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdffr p0.b
+; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
ret <vscale x 16 x i1> %out
}
-define <vscale x 16 x i1> @rdffr_z(<vscale x 16 x i1> %pg) {
+define <vscale x 16 x i1> @rdffr_z(<vscale x 16 x i1> %pg) #0 {
; CHECK-LABEL: rdffr_z:
-; CHECK: rdffr p0.b, p0/z
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdffr p0.b, p0/z
+; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
ret <vscale x 16 x i1> %out
}
+; Test that rdffr.z followed by ptest optimizes to flags-setting rdffrs.
+define i1 @rdffr_z_ptest(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: rdffr_z_ptest:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdffrs p0.b, p0/z
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %rdffr = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
+ %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %rdffr)
+ ret i1 %out
+}
+
;
; SETFFR
;
-define void @set_ffr() {
+define void @set_ffr() #0 {
; CHECK-LABEL: set_ffr:
-; CHECK: setffr
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: setffr
+; CHECK-NEXT: ret
call void @llvm.aarch64.sve.setffr()
ret void
}
@@ -36,10 +54,11 @@ define void @set_ffr() {
; WRFFR
;
-define void @wrffr(<vscale x 16 x i1> %a) {
+define void @wrffr(<vscale x 16 x i1> %a) #0 {
; CHECK-LABEL: wrffr:
-; CHECK: wrffr p0.b
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: wrffr p0.b
+; CHECK-NEXT: ret
call void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1> %a)
ret void
}
@@ -48,3 +67,7 @@ declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>)
declare void @llvm.aarch64.sve.setffr()
declare void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1>)
+
+declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll
new file mode 100644
index 000000000000..4360d99bd03c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Test that rdffr is substituted with predicated form which enables ptest optimization later.
+define <vscale x 16 x i1> @predicate_rdffr() #0 {
+; CHECK-LABEL: @predicate_rdffr(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[OUT]]
+;
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
+ ret <vscale x 16 x i1> %out
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
+
+attributes #0 = { "target-features"="+sve" }
More information about the llvm-commits
mailing list