[llvm] [VP] Detect truncated and shifted EVLs during expansion (PR #154334)

Tue Aug 19 06:39:34 PDT 2025

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/154334

This PR aims to help VP intrinsics on AArch64 (i.e. https://github.com/llvm/llvm-project/pull/154327), where the EVL operand needs to be "expanded" into the mask operand because SVE doesn't have the notion of EVL.

If the loop vectorizer tries to use an exact EVL that matches a scalable vector type's element count, in theory it should be something like `(mul i32 vscale, n)`.

ExpandVectorPredication tries to detect this, and if it does then it skips folding the EVL into the mask since the EVL covers every element anyway. 

In practice though vscale will be truncated from i64, and because it's a known power of 2 the multiply will be canonicalized to a shl. So it will really look like `(trunc (shl i64 vscale, n) to i32)`.


>From de961fd390894d1005574d82f6e8da0cb2835b4b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 19 Aug 2025 21:20:42 +0800
Subject: [PATCH 1/2] Precommit tests

---
 .../expand-vp-convert-evl.ll                  | 119 ++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll

diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll
new file mode 100644
index 0000000000000..191edcc9324c3
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=pre-isel-intrinsic-lowering -expandvp-override-evl-transform=Convert -expandvp-override-mask-transform=Legal -S < %s | FileCheck %s
+
+define <vscale x 2 x i64> @unknown_evl(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i1> %m, i32 %evl) {
+; CHECK-LABEL: define <vscale x 2 x i64> @unknown_evl(
+; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64> [[Y:%.*]], <vscale x 2 x i1> [[M:%.*]], i32 [[EVL:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 2
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+;
+  %add = call <vscale x 2 x i64> @llvm.vp.add(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %add
+}
+
+define <2 x i64> @exact_evl_fixed(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m) {
+; CHECK-LABEL: define <2 x i64> @exact_evl_fixed(
+; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i1> [[M:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> [[M]], i32 2)
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %add = call <2 x i64> @llvm.vp.add(<2 x i64> poison, <2 x i64> poison, <2 x i1> %m, i32 2)
+  ret <2 x i64> %add
+}
+
+define <vscale x 2 x i64> @exact_evl_vscale_mul(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: define <vscale x 2 x i64> @exact_evl_vscale_mul(
+; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64> [[Y:%.*]], <vscale x 2 x i1> [[M:%.*]]) {
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[EVL:%.*]] = mul i32 [[VSCALE]], 2
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[M]], i32 [[EVL]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+;
+  %vscale = call i32 @llvm.vscale()
+  %evl = mul i32 %vscale, 2
+  %add = call <vscale x 2 x i64> @llvm.vp.add(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %add
+}
+
+define <vscale x 2 x i64> @exact_evl_vscale_shl(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: define <vscale x 2 x i64> @exact_evl_vscale_shl(
+; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64> [[Y:%.*]], <vscale x 2 x i1> [[M:%.*]]) {
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[EVL:%.*]] = shl i32 [[VSCALE]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
+; CHECK-NEXT:    [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+;
+  %vscale = call i32 @llvm.vscale()
+  %evl = shl i32 %vscale, 1
+  %add = call <vscale x 2 x i64> @llvm.vp.add(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %add
+}
+
+define <vscale x 2 x i64> @exact_evl_vscale_mul_trunc(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: define <vscale x 2 x i64> @exact_evl_vscale_mul_trunc(
+; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64> [[Y:%.*]], <vscale x 2 x i1> [[M:%.*]]) {
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHL:%.*]] = mul i64 [[VSCALE]], 2
+; CHECK-NEXT:    [[EVL:%.*]] = trunc nuw i64 [[SHL]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
+; CHECK-NEXT:    [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+;
+  %vscale = call i64 @llvm.vscale()
+  %shl = mul i64 %vscale, 2
+  %evl = trunc nuw i64 %shl to i32
+  %add = call <vscale x 2 x i64> @llvm.vp.add(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %add
+}
+
+
+define <vscale x 2 x i64> @exact_evl_vscale_shl_trunc(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: define <vscale x 2 x i64> @exact_evl_vscale_shl_trunc(
+; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64> [[Y:%.*]], <vscale x 2 x i1> [[M:%.*]]) {
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[VSCALE]], 1
+; CHECK-NEXT:    [[EVL:%.*]] = trunc nuw i64 [[SHL]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
+; CHECK-NEXT:    [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+;
+  %vscale = call i64 @llvm.vscale()
+  %shl = shl i64 %vscale, 1
+  %evl = trunc nuw i64 %shl to i32
+  %add = call <vscale x 2 x i64> @llvm.vp.add(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %add
+}
+
+define <vscale x 2 x i64> @exact_evl_vscale_shl_trunc_no_nuw(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: define <vscale x 2 x i64> @exact_evl_vscale_shl_trunc_no_nuw(
+; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64> [[Y:%.*]], <vscale x 2 x i1> [[M:%.*]]) {
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[VSCALE]], 1
+; CHECK-NEXT:    [[EVL:%.*]] = trunc i64 [[SHL]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
+; CHECK-NEXT:    [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+;
+  %vscale = call i64 @llvm.vscale()
+  %shl = shl i64 %vscale, 1
+  %evl = trunc i64 %shl to i32
+  %add = call <vscale x 2 x i64> @llvm.vp.add(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %add
+}

>From b13a35aa44d5cc4ea8c148ca523d3b7f553d1c4e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 19 Aug 2025 21:23:35 +0800
Subject: [PATCH 2/2] [VP] Detect truncated and shifted EVLs during expansion

---
 llvm/include/llvm/IR/PatternMatch.h            |  7 +++++++
 llvm/lib/IR/IntrinsicInst.cpp                  |  6 +++++-
 .../expand-vp-convert-evl.ll                   | 18 +++---------------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 2ab652ca258c6..ec4dc787747ca 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2193,6 +2193,13 @@ m_TruncOrSelf(const OpTy &Op) {
   return m_CombineOr(m_Trunc(Op), Op);
 }
 
+template <typename OpTy>
+inline match_combine_or<NoWrapTrunc_match<OpTy, TruncInst::NoUnsignedWrap>,
+                        OpTy>
+m_NUWTruncOrSelf(const OpTy &Op) {
+  return m_CombineOr(m_NUWTrunc(Op), Op);
+}
+
 /// Matches SExt.
 template <typename OpTy>
 inline CastInst_match<OpTy, SExtInst> m_SExt(const OpTy &Op) {
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 23a4d1b5c615e..22c1479174e2c 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -613,8 +613,12 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
   if (EC.isScalable()) {
     // Compare vscale patterns
     uint64_t VScaleFactor;
-    if (match(VLParam, m_Mul(m_VScale(), m_ConstantInt(VScaleFactor))))
+    if (match(VLParam,
+              m_NUWTruncOrSelf(m_Mul(m_VScale(), m_ConstantInt(VScaleFactor)))))
       return VScaleFactor >= EC.getKnownMinValue();
+    if (match(VLParam,
+              m_NUWTruncOrSelf(m_Shl(m_VScale(), m_ConstantInt(VScaleFactor)))))
+      return 1 << VScaleFactor >= EC.getKnownMinValue();
     return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale());
   }
 
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll
index 191edcc9324c3..8dd8ae153ae58 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll
@@ -44,11 +44,7 @@ define <vscale x 2 x i64> @exact_evl_vscale_shl(<vscale x 2 x i64> %x, <vscale x
 ; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64> [[Y:%.*]], <vscale x 2 x i1> [[M:%.*]]) {
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[EVL:%.*]] = shl i32 [[VSCALE]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
-; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
-; CHECK-NEXT:    [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2
-; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[M]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
 ;
   %vscale = call i32 @llvm.vscale()
@@ -63,11 +59,7 @@ define <vscale x 2 x i64> @exact_evl_vscale_mul_trunc(<vscale x 2 x i64> %x, <vs
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[SHL:%.*]] = mul i64 [[VSCALE]], 2
 ; CHECK-NEXT:    [[EVL:%.*]] = trunc nuw i64 [[SHL]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
-; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
-; CHECK-NEXT:    [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2
-; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[M]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
 ;
   %vscale = call i64 @llvm.vscale()
@@ -84,11 +76,7 @@ define <vscale x 2 x i64> @exact_evl_vscale_shl_trunc(<vscale x 2 x i64> %x, <vs
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[VSCALE]], 1
 ; CHECK-NEXT:    [[EVL:%.*]] = trunc nuw i64 [[SHL]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]])
-; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M]]
-; CHECK-NEXT:    [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2
-; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[TMP2]], i32 [[SCALABLE_SIZE]])
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i1> [[M]], i32 [[EVL]])
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
 ;
   %vscale = call i64 @llvm.vscale()