[llvm] 2e57761 - [VPlan] Simplify select !c, x, y -> select c, y, x (#147268)

Tue Jul 8 00:56:12 PDT 2025

Author: Luke Lau
Date: 2025-07-08T15:56:04+08:00
New Revision: 2e5776130b1da8d2d553b62a87c6bf6ace07e2f8

URL: https://github.com/llvm/llvm-project/commit/2e5776130b1da8d2d553b62a87c6bf6ace07e2f8
DIFF: https://github.com/llvm/llvm-project/commit/2e5776130b1da8d2d553b62a87c6bf6ace07e2f8.diff

LOG: [VPlan] Simplify select !c, x, y -> select c, y, x (#147268)

This is split off from #133993

On its own this simplification isn't that useful, but it allows us to
make the equivalent VPBlendRecipe optimisation more generic by operating
on VPInstructions.

In order to actually test this without #133993, I've had to also extend
the m_Not pattern matcher to also catch VPWidenRecipes, since I couldn't
really think of a straightforward way to create a VPInstruction::Select
with a negated condition.

Added: 
    llvm/test/Transforms/LoopVectorize/select-neg-cond.ll

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index efea99f22d086..d133610ef4f75 100644

--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -366,12 +366,6 @@ m_Freeze(const Op0_t &Op0) {
   return m_VPInstruction<Instruction::Freeze>(Op0);
 }
 
-template <typename Op0_t>
-inline UnaryVPInstruction_match<Op0_t, VPInstruction::Not>
-m_Not(const Op0_t &Op0) {
-  return m_VPInstruction<VPInstruction::Not>(Op0);
-}
-
 template <typename Op0_t>
 inline UnaryVPInstruction_match<Op0_t, VPInstruction::BranchOnCond>
 m_BranchOnCond(const Op0_t &Op0) {
@@ -491,6 +485,15 @@ m_Select(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
       {Op0, Op1, Op2});
 }
 
+template <typename Op0_t>
+inline match_combine_or<UnaryVPInstruction_match<Op0_t, VPInstruction::Not>,
+                        AllBinaryRecipe_match<int_pred_ty<is_all_ones>, Op0_t,
+                                              Instruction::Xor, true>>
+m_Not(const Op0_t &Op0) {
+  return m_CombineOr(m_VPInstruction<VPInstruction::Not>(Op0),
+                     m_c_Binary<Instruction::Xor>(m_AllOnes(), Op0));
+}
+
 template <typename Op0_t, typename Op1_t>
 inline match_combine_or<
     BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd>,

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 90137b72c83fb..701a6e455d09c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1082,6 +1082,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
   if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
     return Def->replaceAllUsesWith(X);
 
+  // select !c, x, y -> select c, y, x
+  VPValue *C;
+  if (match(Def, m_Select(m_Not(m_VPValue(C)), m_VPValue(X), m_VPValue(Y)))) {
+    Def->setOperand(0, C);
+    Def->setOperand(1, Y);
+    Def->setOperand(2, X);
+    return;
+  }
+
   if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
     return Def->replaceAllUsesWith(A);
 

diff  --git a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll
new file mode 100644
index 0000000000000..def239eed33bc
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s
+
+define void @neg_cond(ptr noalias %p, ptr noalias %q) {
+; CHECK-LABEL: define void @neg_cond(
+; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 42)
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> splat (i32 42), <4 x i32> splat (i32 43)
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[P_GEP:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P_GEP]], align 4
+; CHECK-NEXT:    [[Q_GEP:%.*]] = getelementptr i32, ptr [[Q]], i32 [[IV]]
+; CHECK-NEXT:    [[Y:%.*]] = load i32, ptr [[Q_GEP]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 42
+; CHECK-NEXT:    [[NOT:%.*]] = xor i1 [[CMP]], true
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[NOT]], i32 42, i32 43
+; CHECK-NEXT:    store i32 [[SEL]], ptr [[P_GEP]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i32 [[IV_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop]
+  %p.gep = getelementptr i32, ptr %p, i32 %iv
+  %x = load i32, ptr %p.gep
+  %q.gep = getelementptr i32, ptr %q, i32 %iv
+  %y = load i32, ptr %q.gep
+  %cmp = icmp eq i32 %x, 42
+  %not = xor i1 %cmp, 1
+  %sel = select i1 %not, i32 42, i32 43
+  store i32 %sel, ptr %p.gep
+  %iv.next = add i32 %iv, 1
+  %done = icmp eq i32 %iv.next, 1024
+  br i1 %done, label %exit, label %loop
+
+exit:
+  ret void
+}