[llvm] 1a26659 - [AArch64][SVE] Improve codegen when extracting first lane of active lane mask
Rosie Sumpter via llvm-commits
llvm-commits at lists.llvm.org
Mon May 9 06:02:09 PDT 2022
Author: Rosie Sumpter
Date: 2022-05-09T13:56:04+01:00
New Revision: 1a2665902f128155fa1febafea990ebaee9476f2
URL: https://github.com/llvm/llvm-project/commit/1a2665902f128155fa1febafea990ebaee9476f2
DIFF: https://github.com/llvm/llvm-project/commit/1a2665902f128155fa1febafea990ebaee9476f2.diff
LOG: [AArch64][SVE] Improve codegen when extracting first lane of active lane mask
When extracting the first lane of a predicate created using the
llvm.get.active.lane.mask intrinsic, it should give the same codegen as
when the predicate is created using the llvm.aarch64.sve.whilelo
intrinsic, since get.active.lane.mask is lowered to whilelo. This patch
ensures the codegen is the same by recognizing
llvm.get.active.lane.mask as a flag-setting operation in this case.
Differential Revision: https://reviews.llvm.org/D125215
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc99ed0b4066..31874304001d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14666,7 +14666,9 @@ static bool isPredicateCCSettingOp(SDValue N) {
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
- N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt)))
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
+ // get_active_lane_mask is lowered to a whilelo instruction.
+ N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
return true;
return false;
diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
index 380501c141fa..6812f0bdb588 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -170,6 +170,17 @@ define i1 @whilelt_first(i64 %next, i64 %end) {
ret i1 %bit
}
+define i1 @lane_mask_first(i64 %next, i64 %end) {
+; CHECK-LABEL: lane_mask_first:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelo p0.s, x0, x1
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %predicate = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %next, i64 %end)
+ %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+ ret i1 %bit
+}
+
declare i64 @llvm.vscale.i64()
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)
@@ -179,3 +190,4 @@ declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64)
+declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)
More information about the llvm-commits
mailing list