[llvm] [AArch64][InstCombine] Canonicalize whilelo intrinsic (PR #151553)
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 09:18:09 PDT 2025
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/151553
InstCombine llvm.aarch64.sve.whilelo to the generic LLVM llvm.get.active.lane.mask intrinsic
>From f4711c667c5534f1144794079c0995998ab6b856 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 23 Jul 2025 02:15:52 +0000
Subject: [PATCH] [AArch64][InstCombine] Canonicalize whilelo intrinsic
InstCombine llvm.aarch64.sve.whilelo to the generic LLVM
llvm.get.active.lane.mask intrinsic
---
.../AArch64/AArch64TargetTransformInfo.cpp | 11 ++++
.../AArch64/sve-intrinsic-whilelo.ll | 66 +++++++++++++++++++
2 files changed, 77 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 18ca22fc9f211..1220a0fc8ee82 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2696,6 +2696,15 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
return std::nullopt;
}
+static std::optional<Instruction *> instCombineWhilelo(InstCombiner &IC,
+ IntrinsicInst &II) {
+ return IC.replaceInstUsesWith(
+ II,
+ IC.Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
+ {II.getType(), II.getOperand(0)->getType()},
+ {II.getOperand(0), II.getOperand(1)}));
+}
+
static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
IntrinsicInst &II) {
if (match(II.getOperand(0), m_ConstantInt<AArch64SVEPredPattern::all>()))
@@ -2830,6 +2839,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVEDupqLane(IC, II);
case Intrinsic::aarch64_sve_insr:
return instCombineSVEInsr(IC, II);
+ case Intrinsic::aarch64_sve_whilelo:
+ return instCombineWhilelo(IC, II);
case Intrinsic::aarch64_sve_ptrue:
return instCombinePTrue(IC, II);
case Intrinsic::aarch64_sve_uxtb:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll
new file mode 100644
index 0000000000000..9dde171217432
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-whilelo.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+
+define <vscale x 4 x float> @const_whilelo_nxv4i32(ptr %0) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @const_whilelo_nxv4i32(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
+; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
+;
+ %mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 0, i32 4)
+ %load = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull %0, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
+ ret <vscale x 4 x float> %load
+}
+
+define <vscale x 8 x float> @const_whilelo_nxv8f32(ptr %0) #0 {
+; CHECK-LABEL: define <vscale x 8 x float> @const_whilelo_nxv8f32(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 0, i32 8)
+; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 8 x i1> [[MASK]], <vscale x 8 x float> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 8 x float> [[LOAD]]
+;
+ %mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 0, i32 8)
+ %load = tail call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr nonnull %0, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x float> zeroinitializer)
+ ret <vscale x 8 x float> %load
+}
+
+define <vscale x 8 x i16> @const_whilelo_nxv8i16(ptr %0) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @const_whilelo_nxv8i16(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 0, i32 8)
+; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 8 x i1> [[MASK]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 8 x i16> [[LOAD]]
+;
+ %mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i16(i32 0, i32 8)
+ %load = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr nonnull %0, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i16> zeroinitializer)
+ ret <vscale x 8 x i16> %load
+}
+
+define <vscale x 16 x i8> @const_whilelo_nxv16i8(ptr %0) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @const_whilelo_nxv16i8(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 16)
+; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
+;
+ %mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i8(i32 0, i32 16)
+ %load = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull %0, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> zeroinitializer)
+ ret <vscale x 16 x i8> %load
+}
+
+
+define <vscale x 16 x i8> @whilelo_nxv16i8(ptr %0, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @whilelo_nxv16i8(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
+;
+ %mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i8(i32 %a, i32 %b)
+ %load = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull %0, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> zeroinitializer)
+ ret <vscale x 16 x i8> %load
+}
More information about the llvm-commits
mailing list