[llvm-branch-commits] [llvm] 3122c66 - [AArch64][SVE] Remove chains of unnecessary SVE reinterpret intrinsics
Joe Ellis via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 13 01:49:34 PST 2021
Author: Joe Ellis
Date: 2021-01-13T09:44:09Z
New Revision: 3122c66aee7b709046753873c4e94db73742b3de
URL: https://github.com/llvm/llvm-project/commit/3122c66aee7b709046753873c4e94db73742b3de
DIFF: https://github.com/llvm/llvm-project/commit/3122c66aee7b709046753873c4e94db73742b3de.diff
LOG: [AArch64][SVE] Remove chains of unnecessary SVE reinterpret intrinsics
This commit extends SVEIntrinsicOpts::optimizeConvertFromSVBool to
identify and remove longer chains of redundant SVE reintepret
intrinsics. For example, the following chain of redundant SVE
reinterprets is now recognised as redundant:
%a = <vscale x 2 x i1>
%1 = <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool(<vscale x 2 x i1> %a)
%2 = <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool(<vscale x 16 x i1> %1)
%3 = <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool(<vscale x 4 x i1> %2)
%4 = <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool(<vscale x 16 x i1> %3)
%5 = <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool(<vscale x 4 x i1> %4)
%6 = <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool(<vscale x 16 x i1> %5)
ret <vscale x 2 x i1> %6
and will be replaced with:
ret <vscale x 2 x i1> %a
Eliminating these can sometimes mean emitting fewer unnecessary
loads/stores when lowering to assembly.
Differential Revision: https://reviews.llvm.org/D94074
Added:
Modified:
llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 8e8b12c07bbf..9911f33371c6 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -177,22 +177,50 @@ bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
if (isa<PHINode>(I->getArgOperand(0)))
return processPhiNode(I);
- // If we have a reinterpret intrinsic I of type A which is converting from
- // another reinterpret Y of type B, and the source type of Y is A, then we can
- // elide away both reinterprets if there are no other users of Y.
- auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
- if (!Y)
- return false;
+ SmallVector<Instruction *, 32> CandidatesForRemoval;
+ Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
+
+ const auto *IVTy = cast<VectorType>(I->getType());
+
+ // Walk the chain of conversions.
+ while (Cursor) {
+ // If the type of the cursor has fewer lanes than the final result, zeroing
+ // must take place, which breaks the equivalence chain.
+ const auto *CursorVTy = cast<VectorType>(Cursor->getType());
+ if (CursorVTy->getElementCount().getKnownMinValue() <
+ IVTy->getElementCount().getKnownMinValue())
+ break;
+
+ // If the cursor has the same type as I, it is a viable replacement.
+ if (Cursor->getType() == IVTy)
+ EarliestReplacement = Cursor;
- Value *SourceVal = Y->getArgOperand(0);
- if (I->getType() != SourceVal->getType())
+ auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
+
+ // If this is not an SVE conversion intrinsic, this is the end of the chain.
+ if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_to_svbool ||
+ IntrinsicCursor->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_from_svbool))
+ break;
+
+ CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
+ Cursor = IntrinsicCursor->getOperand(0);
+ }
+
+ // If no viable replacement in the conversion chain was found, there is
+ // nothing to do.
+ if (!EarliestReplacement)
return false;
- I->replaceAllUsesWith(SourceVal);
+ I->replaceAllUsesWith(EarliestReplacement);
I->eraseFromParent();
- if (Y->use_empty())
- Y->eraseFromParent();
+ while (!CandidatesForRemoval.empty()) {
+ Instruction *Candidate = CandidatesForRemoval.pop_back_val();
+ if (Candidate->use_empty())
+ Candidate->eraseFromParent();
+ }
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
index 47e0ff8f19c7..22c61d0565af 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -67,6 +67,62 @@ define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
ret <vscale x 16 x i1> %2
}
+define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
+; OPT-LABEL: @reinterpret_test_full_chain(
+; OPT: ret <vscale x 2 x i1> %a
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+ %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+ %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
+ %6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
+ ret <vscale x 2 x i1> %6
+}
+
+; The last two reinterprets are not necessary, since they are doing the same
+; work as the first two.
+define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
+; OPT-LABEL: @reinterpret_test_partial_chain(
+; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
+; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+; OPT-NEXT: ret <vscale x 4 x i1> %2
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+ %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+ %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
+ ret <vscale x 4 x i1> %4
+}
+
+; The chain cannot be reduced because of the second reinterpret, which causes
+; zeroing.
+define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
+; OPT-LABEL: @reinterpret_test_irreducible_chain(
+; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+; OPT-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+; OPT-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
+; OPT-NEXT: ret <vscale x 8 x i1> %4
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+ %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+ %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
+ ret <vscale x 8 x i1> %4
+}
+
+; Here, the candidate list is larger than the number of instructions that we
+; end up removing.
+define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
+; OPT-LABEL: @reinterpret_test_keep_some_candidates(
+; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+; OPT-NEXT: ret <vscale x 4 x i1> %2
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+ %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+ %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
+ ret <vscale x 4 x i1> %4
+}
+
define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
; OPT-LABEL: reinterpret_reductions
; OPT-NOT: convert
More information about the llvm-branch-commits
mailing list