[llvm] 1a26659 - [AArch64][SVE] Improve codegen when extracting first lane of active lane mask

Mon May 9 06:02:09 PDT 2022

Author: Rosie Sumpter
Date: 2022-05-09T13:56:04+01:00
New Revision: 1a2665902f128155fa1febafea990ebaee9476f2

URL: https://github.com/llvm/llvm-project/commit/1a2665902f128155fa1febafea990ebaee9476f2
DIFF: https://github.com/llvm/llvm-project/commit/1a2665902f128155fa1febafea990ebaee9476f2.diff

LOG: [AArch64][SVE] Improve codegen when extracting first lane of active lane mask

When extracting the first lane of a predicate created using the
llvm.get.active.lane.mask intrinsic, it should give the same codegen as
when the predicate is created using the llvm.aarch64.sve.whilelo
intrinsic, since get.active.lane.mask is lowered to whilelo. This patch
ensures the codegen is the same by recognizing
llvm.get.active.lane.mask as a flag-setting operation in this case.

Differential Revision: https://reviews.llvm.org/D125215

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-cmp-folds.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc99ed0b4066..31874304001d 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14666,7 +14666,9 @@ static bool isPredicateCCSettingOp(SDValue N) {
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
-        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt)))
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
+        // get_active_lane_mask is lowered to a whilelo instruction.
+        N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
     return true;
 
   return false;

diff  --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
index 380501c141fa..6812f0bdb588 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -170,6 +170,17 @@ define i1 @whilelt_first(i64 %next, i64 %end) {
   ret i1 %bit
 }
 
+define i1 @lane_mask_first(i64 %next, i64 %end) {
+; CHECK-LABEL: lane_mask_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.s, x0, x1
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
+  %predicate = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %next, i64 %end)
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
 declare i64 @llvm.vscale.i64()
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)
@@ -179,3 +190,4 @@ declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64)
+declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)