[llvm] [SLP] Fix incorrect operand info for select in getCmpSelInstrCost (PR #188506)

Wed Mar 25 08:20:16 PDT 2026

https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/188506

The operand info passed to getCmpSelInstrCost for Select instructions
was using operands 0 and 1 (condition and true value), but the API
expects info about the data operands (true and false values). For
selects, the data operands are at indices 1 and 2, not 0 and 1.
This led to the cost model receiving the condition's operand info
instead of the false arm's, potentially producing inaccurate cost
estimates.


>From f871503c9f8fda1678f3d5eeb724e81fb4404077 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Wed, 25 Mar 2026 08:20:01 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 19 ++++++---
 .../RISCV/shuffled-gather-casted.ll           | 39 ++++++++++++-------
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 25ba77dca7e93..c274cdbdce4a9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16010,8 +16010,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
 
       InstructionCost ScalarCost = TTI->getCmpSelInstrCost(
           E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred,
-          CostKind, getOperandInfo(VI->getOperand(0)),
-          getOperandInfo(VI->getOperand(1)), VI);
+          CostKind,
+          getOperandInfo(
+              VI->getOperand(ShuffleOrOp == Instruction::Select ? 1 : 0)),
+          getOperandInfo(
+              VI->getOperand(ShuffleOrOp == Instruction::Select ? 2 : 1)),
+          VI);
       InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI);
       if (IntrinsicCost.isValid())
         ScalarCost = IntrinsicCost;
@@ -16021,10 +16025,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
     auto GetVectorCost = [&](InstructionCost CommonCost) {
       auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size());
 
-      InstructionCost VecCost =
-          TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred,
-                                  CostKind, getOperandInfo(E->getOperand(0)),
-                                  getOperandInfo(E->getOperand(1)), VL0);
+      InstructionCost VecCost = TTI->getCmpSelInstrCost(
+          E->getOpcode(), VecTy, MaskTy, VecPred, CostKind,
+          getOperandInfo(
+              E->getOperand(ShuffleOrOp == Instruction::Select ? 1 : 0)),
+          getOperandInfo(
+              E->getOperand(ShuffleOrOp == Instruction::Select ? 2 : 1)),
+          VL0);
       if (auto *SI = dyn_cast<SelectInst>(VL0)) {
         auto *CondType =
             getWidenedType(SI->getCondition()->getType(), VL.size());
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
index 06c4bc205adf0..9701b1f9fc397 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
@@ -58,22 +58,33 @@ define i32 @test1(ptr %p) {
 ; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[D_0:%.*]] = load i16, ptr [[P]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> <i16 poison, i16 0, i16 0, i16 0>, i16 [[D_0]], i32 0
+; CHECK-NEXT:    [[ZEXT_D_0:%.*]] = zext i16 [[D_0]] to i32
+; CHECK-NEXT:    [[ZERO_0:%.*]] = zext i16 0 to i32
+; CHECK-NEXT:    [[ZERO_1:%.*]] = zext i16 0 to i32
+; CHECK-NEXT:    [[ZERO_2:%.*]] = zext i16 0 to i32
+; CHECK-NEXT:    [[OR_D_0:%.*]] = or i32 [[ZEXT_D_0]], 0
+; CHECK-NEXT:    [[OR_ZERO_0:%.*]] = or i32 [[ZERO_0]], 0
+; CHECK-NEXT:    [[OR_ZERO_1:%.*]] = or i32 [[ZERO_1]], 0
+; CHECK-NEXT:    [[OR_ZERO_2:%.*]] = or i32 [[ZERO_2]], 0
 ; CHECK-NEXT:    [[SZERO_2:%.*]] = sext i16 -1 to i32
+; CHECK-NEXT:    [[SZERO_0:%.*]] = sext i16 -16383 to i32
 ; CHECK-NEXT:    [[UZERO_1:%.*]] = zext i16 -1 to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[UZERO_1]], i32 2
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[SZERO_2]], i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> <i32 -1, i32 -16383, i32 undef, i32 undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP13:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP13]], [[TMP12]]
-; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], <i32 65535, i32 -16383, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i16>
-; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP10]], <4 x i16> <i16 4, i16 3, i16 2, i16 1>
-; CHECK-NEXT:    [[TMP7:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
+; CHECK-NEXT:    [[SZERO_3:%.*]] = sext i16 -1 to i32
+; CHECK-NEXT:    [[ZERO_D_0:%.*]] = and i32 [[OR_D_0]], [[SZERO_2]]
+; CHECK-NEXT:    [[AND_ZERO_0:%.*]] = and i32 [[OR_ZERO_0]], [[SZERO_0]]
+; CHECK-NEXT:    [[AND_ZERO_1:%.*]] = and i32 [[OR_ZERO_1]], [[UZERO_1]]
+; CHECK-NEXT:    [[AND_ZERO_2:%.*]] = and i32 [[OR_ZERO_2]], [[SZERO_3]]
+; CHECK-NEXT:    [[D_0_GT_0:%.*]] = icmp eq i32 [[ZEXT_D_0]], 65535
+; CHECK-NEXT:    [[FALSE_0:%.*]] = icmp eq i32 [[SZERO_0]], -16383
+; CHECK-NEXT:    [[FALSE_1:%.*]] = icmp eq i32 [[UZERO_1]], 65535
+; CHECK-NEXT:    [[FALSE_2:%.*]] = icmp eq i32 [[SZERO_3]], 65535
+; CHECK-NEXT:    [[SELECT_0_2:%.*]] = select i1 [[D_0_GT_0]], i32 [[ZERO_D_0]], i32 4
+; CHECK-NEXT:    [[SELECT_1_0:%.*]] = select i1 [[FALSE_0]], i32 [[AND_ZERO_0]], i32 3
+; CHECK-NEXT:    [[SELECT_2_0:%.*]] = select i1 [[FALSE_1]], i32 [[AND_ZERO_1]], i32 2
+; CHECK-NEXT:    [[SELECT_3_0:%.*]] = select i1 [[FALSE_2]], i32 [[AND_ZERO_2]], i32 1
+; CHECK-NEXT:    [[MAX_0:%.*]] = add i32 [[SELECT_0_2]], [[SELECT_1_0]]
+; CHECK-NEXT:    [[MAX_1:%.*]] = add i32 [[MAX_0]], [[SELECT_2_0]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[MAX_1]], [[SELECT_3_0]]
 ; CHECK-NEXT:    ret i32 [[TMP8]]
 ;
 entry: