[llvm-branch-commits] [llvm] [LV] Add test showing lack of gather/scatter can prevent if-convert (PR #195042)

Tue May 5 01:31:56 PDT 2026

https://github.com/gbossu updated https://github.com/llvm/llvm-project/pull/195042

>From 767d29342a92feebc6e9a3e9e626cd751fa60b49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Wed, 29 Apr 2026 15:45:20 +0000
Subject: [PATCH] [LV] Add test showing lack of gather/scatter can prevent
 if-convert

This introduces a new force-target-supports-gather-scatter-ops CLI
option for testing, as well a new isLegalMaskedLoadOrStore() helper.
---
 .../Vectorize/LoopVectorizationPlanner.cpp    |   8 +-
 .../LoopVectorize/if-conversion-scalable.ll   | 120 ++++++++++++++++++
 2 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/if-conversion-scalable.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp
index 2fbf6dc55d2d6..a47e5807933ed 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp
@@ -59,6 +59,11 @@ static cl::opt<bool> ForceTargetSupportsMaskedMemoryOps(
     cl::desc("Assume the target supports masked memory operations (used for "
              "testing)."));
 
+static cl::opt<bool> ForceTargetSupportsGatherScatterOps(
+    "force-target-supports-gather-scatter-ops", cl::init(false), cl::Hidden,
+    cl::desc("Assume the target supports gather/scatter operations (used for "
+             "testing)."));
+
 bool VFSelectionContext::isLegalMaskedLoadOrStore(Instruction *I,
                                                   ElementCount VF) const {
   assert(isa<LoadInst>(I) || isa<StoreInst>(I));
@@ -81,7 +86,8 @@ bool VFSelectionContext::isLegalGatherOrScatter(Value *V,
   Align Align = getLoadStoreAlignment(V);
   if (VF.isVector())
     Ty = VectorType::get(Ty, VF);
-  return (LI && TTI.isLegalMaskedGather(Ty, Align)) ||
+  return ForceTargetSupportsGatherScatterOps ||
+         (LI && TTI.isLegalMaskedGather(Ty, Align)) ||
          (SI && TTI.isLegalMaskedScatter(Ty, Align));
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-scalable.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-scalable.ll
new file mode 100644
index 0000000000000..1e0d7eafc9096
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-scalable.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:"
+
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8  -scalable-vectorization=on \
+; RUN:     -force-target-supports-masked-memory-ops -force-target-supports-scalable-vectors \
+; RUN:     -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -force-target-supports-gather-scatter-ops \
+; RUN:     -S < %s | FileCheck %s --check-prefix=CHECK-GATHER-ENABLED
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8  -scalable-vectorization=on \
+; RUN:     -force-target-supports-masked-memory-ops -force-target-supports-scalable-vectors \
+; RUN:     -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses \
+; RUN:     -S < %s | FileCheck %s --check-prefix=CHECK-GATHER-DISABLED
+
+; Test whether the legality of gather/scatter has an impact on if-conversion
+; for scalable VFs. In either case, the example below should be vectorized with
+; VF = vscale x 8 and a masked deinterleaved load.
+
+define void @conv_interleaved_loads_load(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef readonly %c, ptr nocapture noundef readonly %flags, i64 %cond) {
+; CHECK-GATHER-ENABLED-LABEL: @conv_interleaved_loads_load(
+; CHECK-GATHER-ENABLED-NEXT:  entry:
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-GATHER-ENABLED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; CHECK-GATHER-ENABLED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-GATHER-ENABLED:       vector.ph:
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-GATHER-ENABLED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; CHECK-GATHER-ENABLED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-GATHER-ENABLED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[COND:%.*]], i64 0
+; CHECK-GATHER-ENABLED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-GATHER-ENABLED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-GATHER-ENABLED:       vector.body:
+; CHECK-GATHER-ENABLED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-GATHER-ENABLED-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP4]], align 2
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[FLAGS:%.*]], i64 [[INDEX]]
+; CHECK-GATHER-ENABLED-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i64>, ptr [[TMP5]], align 8
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP6:%.*]] = icmp ne <vscale x 8 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP7:%.*]] = getelementptr [2 x i16], ptr [[C:%.*]], i64 [[INDEX]], i64 0
+; CHECK-GATHER-ENABLED-NEXT:    [[INTERLEAVED_MASK:%.*]] = call <vscale x 16 x i1> @llvm.vector.interleave2.nxv16i1(<vscale x 8 x i1> [[TMP6]], <vscale x 8 x i1> [[TMP6]])
+; CHECK-GATHER-ENABLED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <vscale x 16 x i16> @llvm.masked.load.nxv16i16.p0(ptr align 2 [[TMP7]], <vscale x 16 x i1> [[INTERLEAVED_MASK]], <vscale x 16 x i16> poison)
+; CHECK-GATHER-ENABLED-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> [[WIDE_MASKED_VEC]])
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[STRIDED_VEC]], 0
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[STRIDED_VEC]], 1
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP10:%.*]] = add <vscale x 8 x i16> [[TMP8]], [[TMP9]]
+; CHECK-GATHER-ENABLED-NEXT:    [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP6]], <vscale x 8 x i16> [[TMP10]], <vscale x 8 x i16> zeroinitializer
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP11:%.*]] = add <vscale x 8 x i16> [[WIDE_LOAD]], [[PREDPHI]]
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-GATHER-ENABLED-NEXT:    store <vscale x 8 x i16> [[TMP11]], ptr [[TMP12]], align 2
+; CHECK-GATHER-ENABLED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-GATHER-ENABLED-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-GATHER-ENABLED-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-GATHER-ENABLED:       middle.block:
+; CHECK-GATHER-ENABLED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-GATHER-ENABLED-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-GATHER-ENABLED:       scalar.ph:
+;
+; CHECK-GATHER-DISABLED-LABEL: @conv_interleaved_loads_load(
+; CHECK-GATHER-DISABLED-NEXT:  entry:
+; CHECK-GATHER-DISABLED-NEXT:    br label [[VECTOR_PH:%.*]]
+; CHECK-GATHER-DISABLED:       vector.ph:
+; CHECK-GATHER-DISABLED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[COND:%.*]], i64 0
+; CHECK-GATHER-DISABLED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; CHECK-GATHER-DISABLED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-GATHER-DISABLED:       vector.body:
+; CHECK-GATHER-DISABLED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-GATHER-DISABLED-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FLAGS:%.*]], i64 [[INDEX]]
+; CHECK-GATHER-DISABLED-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i64>, ptr [[TMP1]], align 8
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne <8 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP3:%.*]] = getelementptr [2 x i16], ptr [[C:%.*]], i64 [[INDEX]], i64 0
+; CHECK-GATHER-DISABLED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; CHECK-GATHER-DISABLED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr align 2 [[TMP3]], <16 x i1> [[INTERLEAVED_MASK]], <16 x i16> poison)
+; CHECK-GATHER-DISABLED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_MASKED_VEC]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-GATHER-DISABLED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <16 x i16> [[WIDE_MASKED_VEC]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP4:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[STRIDED_VEC2]]
+; CHECK-GATHER-DISABLED-NEXT:    [[PREDPHI:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP4]], <8 x i16> zeroinitializer
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[PREDPHI]]
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-GATHER-DISABLED-NEXT:    store <8 x i16> [[TMP5]], ptr [[TMP6]], align 2
+; CHECK-GATHER-DISABLED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-GATHER-DISABLED-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-GATHER-DISABLED-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-GATHER-DISABLED:       middle.block:
+; CHECK-GATHER-DISABLED-NEXT:    br label [[FOR_COND_CLEANUP:%.*]]
+; CHECK-GATHER-DISABLED:       for.cond.cleanup:
+; CHECK-GATHER-DISABLED-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx0 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx0, align 2
+  %flagidx = getelementptr inbounds i64, ptr %flags, i64 %indvars.iv
+  %scl.val = load i64, ptr %flagidx, align 8
+  %do.access = icmp ne i64 %scl.val, %cond
+  br i1 %do.access, label %if.then, label %for.inc
+
+if.then:
+  %arrayidx1 = getelementptr inbounds [2 x i16], ptr %c, i64 %indvars.iv, i64 0
+  %pred.1 = load i16, ptr %arrayidx1, align 2
+  %arrayidx2 = getelementptr inbounds [2 x i16], ptr %c, i64 %indvars.iv, i64 1
+  %pred.2 = load i16, ptr %arrayidx2, align 2
+  %pred.sum = add i16 %pred.1, %pred.2
+  br label %for.inc
+
+for.inc:
+  %1 = phi i16 [ zeroinitializer, %for.body ], [ %pred.sum, %if.then ]
+  %result = add i16 %0, %1
+  %arrayidx3 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
+  store i16 %result, ptr %arrayidx3, align 2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}