[llvm] [LV] Reverse mask up front, not when creating vector pointer. (PR #72163)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 13 14:06:15 PST 2023


https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/72163

Reverse mask early on when populating BlockInMask. This will enable
separating mask management and address computation from the memory
recipes in the future and is also needed to enable explicit unrolling in
VPlan.


>From 035b334598b4375d4b0682a5ced3f58fcd5a2302 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 13 Nov 2023 22:06:01 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp       | 11 ++++++-----
 .../LoopVectorize/AArch64/sve-vector-reverse-mask4.ll |  2 +-
 .../LoopVectorize/AArch64/vector-reverse-mask4.ll     |  8 ++++----
 .../Transforms/LoopVectorize/X86/masked_load_store.ll |  8 ++++----
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e9d0315d114f65c..ae8d306c44dd885 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9525,8 +9525,12 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
   InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
   bool isMaskRequired = getMask();
   if (isMaskRequired)
-    for (unsigned Part = 0; Part < State.UF; ++Part)
-      BlockInMaskParts[Part] = State.get(getMask(), Part);
+    for (unsigned Part = 0; Part < State.UF; ++Part) {
+      Value *Mask = State.get(getMask(), Part);
+      if (isReverse())
+        Mask = Builder.CreateVectorReverse(Mask, "reverse");
+      BlockInMaskParts[Part] = Mask;
+    }
 
   const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
     // Calculate the pointer for the specific unroll-part.
@@ -9558,9 +9562,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
       PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds);
       PartPtr =
           Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds);
-      if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
-        BlockInMaskParts[Part] =
-            Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse");
     } else {
       Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
       PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
index 58c54103c72c6dc..70833e44b075a98 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
@@ -22,8 +22,8 @@ define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
 ; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
 ; CHECK: %[[REVERSE7:.*]] = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> %[[WIDEMSKLOAD]])
 ; CHECK: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[REVERSE7]]
+; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
 ; CHECK: %[[REVERSE8:.*]] = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> %[[FADD]])
-; CHECK:  %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
 ; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[REVERSE8]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
 
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
index e8e2008912c8344..195826300e3996f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -43,16 +43,16 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr double, ptr [[TMP7]], i64 -3
 ; CHECK-NEXT:    [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[REVERSE4:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr double, ptr [[TMP7]], i64 -3
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr double, ptr [[TMP7]], i64 -7
-; CHECK-NEXT:    [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE4]], <4 x double> poison)
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
 ; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP10]], ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]])
-; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP11]], ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP11]], ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE4]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 40ac668c6e59948..d775b0e0f0199c4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -1391,24 +1391,24 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr
 ; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP1]]
 ; AVX2-NEXT:    [[TMP22:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP2]]
 ; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr double, ptr [[IN]], i64 [[TMP3]]
+; AVX2-NEXT:    [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP24:%.*]] = getelementptr double, ptr [[TMP20]], i32 0
 ; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr double, ptr [[TMP24]], i32 -3
-; AVX2-NEXT:    [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP25]], i32 8, <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope !21
 ; AVX2-NEXT:    [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP26:%.*]] = getelementptr double, ptr [[TMP20]], i32 -4
 ; AVX2-NEXT:    [[TMP27:%.*]] = getelementptr double, ptr [[TMP26]], i32 -3
-; AVX2-NEXT:    [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP27]], i32 8, <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope !21
 ; AVX2-NEXT:    [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP28:%.*]] = getelementptr double, ptr [[TMP20]], i32 -8
 ; AVX2-NEXT:    [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3
-; AVX2-NEXT:    [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP29]], i32 8, <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope !21
 ; AVX2-NEXT:    [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -12
 ; AVX2-NEXT:    [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3
-; AVX2-NEXT:    [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP31]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope !21
 ; AVX2-NEXT:    [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP32:%.*]] = fadd <4 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>



More information about the llvm-commits mailing list