[llvm] [VPlan] Support VPWidenCastRecipe in narrowToSingleScalarRecipes. (PR #141080)

Mel Chen via llvm-commits llvm-commits at lists.llvm.org
Thu May 22 07:49:33 PDT 2025


https://github.com/Mel-Chen created https://github.com/llvm/llvm-project/pull/141080

Narrow VPWidenCastRecipe if the operand being cast is uniform.

Based on https://github.com/llvm/llvm-project/pull/141074

>From 4703c1c88f5bed3c38b328e961b82b6798aa06aa Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 22 May 2025 06:24:43 -0700
Subject: [PATCH 1/2] nfnfc, use TypeSwitch

---
 llvm/lib/Transforms/Vectorize/VPlanUtils.h | 55 +++++++++++++---------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 28c1a6af2570b..5aa0b0ecea9aa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -10,6 +10,7 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
 
 #include "VPlan.h"
+#include "llvm/ADT/TypeSwitch.h"
 
 namespace llvm {
 class ScalarEvolution;
@@ -59,29 +60,37 @@ inline bool isSingleScalar(const VPValue *VPV) {
   if (VPV->isLiveIn())
     return true;
 
-  if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
-    const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
-    // Don't consider recipes in replicate regions as uniform yet; their first
-    // lane cannot be accessed when executing the replicate region for other
-    // lanes.
-    if (RegionOfR && RegionOfR->isReplicator())
-      return false;
-    return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) &&
-                                     all_of(Rep->operands(), isSingleScalar));
-  }
-  if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(VPV))
-    return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar);
-  if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
-    return PreservesUniformity(WidenR->getOpcode()) &&
-           all_of(WidenR->operands(), isSingleScalar);
-  }
-  if (auto *VPI = dyn_cast<VPInstruction>(VPV))
-    return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
-           (PreservesUniformity(VPI->getOpcode()) &&
-            all_of(VPI->operands(), isSingleScalar));
-
-  // VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
-  return isa<VPExpandSCEVRecipe>(VPV);
+  return TypeSwitch<const VPValue *, bool>(VPV)
+      .Case<VPReplicateRecipe>([&](const auto *Rep) {
+        const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
+        // Don't consider recipes in replicate regions as uniform yet; their
+        // first lane cannot be accessed when executing the replicate region for
+        // other lanes.
+        if (RegionOfR && RegionOfR->isReplicator())
+          return false;
+        return Rep->isSingleScalar() ||
+               (PreservesUniformity(Rep->getOpcode()) &&
+                all_of(Rep->operands(), isSingleScalar));
+      })
+      .Case<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(
+          [&](const auto *R) {
+            return all_of(R->getDefiningRecipe()->operands(), isSingleScalar);
+          })
+      .Case<VPWidenRecipe>([&](const auto *WidenR) {
+        return PreservesUniformity(WidenR->getOpcode()) &&
+               all_of(WidenR->operands(), isSingleScalar);
+      })
+      .Case<VPInstruction>([&](const auto *VPI) {
+        return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
+               (PreservesUniformity(VPI->getOpcode()) &&
+                all_of(VPI->operands(), isSingleScalar));
+      })
+      .Case<VPExpandSCEVRecipe>([](const VPValue *) {
+        // VPExpandSCEVRecipes must be placed in the entry and are alway
+        // uniform.
+        return true;
+      })
+      .Default([](const VPValue *) { return false; });
 }
 
 /// Return true if \p V is a header mask in \p Plan.

>From 722abd045c2ee771a4d6dd6c9e973746142e1c67 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 19 May 2025 03:34:40 -0700
Subject: [PATCH 2/2] patch widen-cast in isSingleScalar

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp    | 2 +-
 llvm/lib/Transforms/Vectorize/VPlanUtils.h           | 2 +-
 llvm/test/Transforms/LoopVectorize/X86/cost-model.ll | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 34633cd748eb1..4655e911ab3a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1175,7 +1175,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
            vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
     for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
       auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
-      if (!RepR && !isa<VPWidenRecipe>(&R))
+      if (!RepR && !isa<VPWidenRecipe, VPWidenCastRecipe>(&R))
         continue;
       if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
         continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 5aa0b0ecea9aa..5fb87b25c22cf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -76,7 +76,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
           [&](const auto *R) {
             return all_of(R->getDefiningRecipe()->operands(), isSingleScalar);
           })
-      .Case<VPWidenRecipe>([&](const auto *WidenR) {
+      .Case<VPWidenRecipe, VPWidenCastRecipe>([&](const auto *WidenR) {
         return PreservesUniformity(WidenR->getOpcode()) &&
                all_of(WidenR->operands(), isSingleScalar);
       })
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 7c42c3d9cd52e..fca0caa5cce31 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -378,8 +378,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
-; CHECK-NEXT:    [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
+; CHECK-NEXT:    [[TMP19:%.*]] = zext i1 [[TMP18]] to i8
 ; CHECK-NEXT:    store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]



More information about the llvm-commits mailing list