[llvm] [SLP] Fix incorrect operand info for select in getCmpSelInstrCost (PR #188506)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 25 08:20:16 PDT 2026
https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/188506
The operand info passed to getCmpSelInstrCost for Select instructions
was using operands 0 and 1 (condition and true value), but the API
expects info about the data operands (true and false values). For
selects, the data operands are at indices 1 and 2, not 0 and 1.
This led to the cost model receiving the condition's operand info
instead of the false arm's, potentially producing inaccurate cost
estimates.
>From f871503c9f8fda1678f3d5eeb724e81fb4404077 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Wed, 25 Mar 2026 08:20:01 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 19 ++++++---
.../RISCV/shuffled-gather-casted.ll | 39 ++++++++++++-------
2 files changed, 38 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 25ba77dca7e93..c274cdbdce4a9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16010,8 +16010,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
InstructionCost ScalarCost = TTI->getCmpSelInstrCost(
E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred,
- CostKind, getOperandInfo(VI->getOperand(0)),
- getOperandInfo(VI->getOperand(1)), VI);
+ CostKind,
+ getOperandInfo(
+ VI->getOperand(ShuffleOrOp == Instruction::Select ? 1 : 0)),
+ getOperandInfo(
+ VI->getOperand(ShuffleOrOp == Instruction::Select ? 2 : 1)),
+ VI);
InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI);
if (IntrinsicCost.isValid())
ScalarCost = IntrinsicCost;
@@ -16021,10 +16025,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
auto GetVectorCost = [&](InstructionCost CommonCost) {
auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size());
- InstructionCost VecCost =
- TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred,
- CostKind, getOperandInfo(E->getOperand(0)),
- getOperandInfo(E->getOperand(1)), VL0);
+ InstructionCost VecCost = TTI->getCmpSelInstrCost(
+ E->getOpcode(), VecTy, MaskTy, VecPred, CostKind,
+ getOperandInfo(
+ E->getOperand(ShuffleOrOp == Instruction::Select ? 1 : 0)),
+ getOperandInfo(
+ E->getOperand(ShuffleOrOp == Instruction::Select ? 2 : 1)),
+ VL0);
if (auto *SI = dyn_cast<SelectInst>(VL0)) {
auto *CondType =
getWidenedType(SI->getCondition()->getType(), VL.size());
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
index 06c4bc205adf0..9701b1f9fc397 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
@@ -58,22 +58,33 @@ define i32 @test1(ptr %p) {
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[D_0:%.*]] = load i16, ptr [[P]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 poison, i16 0, i16 0, i16 0>, i16 [[D_0]], i32 0
+; CHECK-NEXT: [[ZEXT_D_0:%.*]] = zext i16 [[D_0]] to i32
+; CHECK-NEXT: [[ZERO_0:%.*]] = zext i16 0 to i32
+; CHECK-NEXT: [[ZERO_1:%.*]] = zext i16 0 to i32
+; CHECK-NEXT: [[ZERO_2:%.*]] = zext i16 0 to i32
+; CHECK-NEXT: [[OR_D_0:%.*]] = or i32 [[ZEXT_D_0]], 0
+; CHECK-NEXT: [[OR_ZERO_0:%.*]] = or i32 [[ZERO_0]], 0
+; CHECK-NEXT: [[OR_ZERO_1:%.*]] = or i32 [[ZERO_1]], 0
+; CHECK-NEXT: [[OR_ZERO_2:%.*]] = or i32 [[ZERO_2]], 0
; CHECK-NEXT: [[SZERO_2:%.*]] = sext i16 -1 to i32
+; CHECK-NEXT: [[SZERO_0:%.*]] = sext i16 -16383 to i32
; CHECK-NEXT: [[UZERO_1:%.*]] = zext i16 -1 to i32
-; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[UZERO_1]], i32 2
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[SZERO_2]], i32 3
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> <i32 -1, i32 -16383, i32 undef, i32 undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP13]], [[TMP12]]
-; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], <i32 65535, i32 -16383, i32 65535, i32 65535>
-; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i16>
-; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP10]], <4 x i16> <i16 4, i16 3, i16 2, i16 1>
-; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
+; CHECK-NEXT: [[SZERO_3:%.*]] = sext i16 -1 to i32
+; CHECK-NEXT: [[ZERO_D_0:%.*]] = and i32 [[OR_D_0]], [[SZERO_2]]
+; CHECK-NEXT: [[AND_ZERO_0:%.*]] = and i32 [[OR_ZERO_0]], [[SZERO_0]]
+; CHECK-NEXT: [[AND_ZERO_1:%.*]] = and i32 [[OR_ZERO_1]], [[UZERO_1]]
+; CHECK-NEXT: [[AND_ZERO_2:%.*]] = and i32 [[OR_ZERO_2]], [[SZERO_3]]
+; CHECK-NEXT: [[D_0_GT_0:%.*]] = icmp eq i32 [[ZEXT_D_0]], 65535
+; CHECK-NEXT: [[FALSE_0:%.*]] = icmp eq i32 [[SZERO_0]], -16383
+; CHECK-NEXT: [[FALSE_1:%.*]] = icmp eq i32 [[UZERO_1]], 65535
+; CHECK-NEXT: [[FALSE_2:%.*]] = icmp eq i32 [[SZERO_3]], 65535
+; CHECK-NEXT: [[SELECT_0_2:%.*]] = select i1 [[D_0_GT_0]], i32 [[ZERO_D_0]], i32 4
+; CHECK-NEXT: [[SELECT_1_0:%.*]] = select i1 [[FALSE_0]], i32 [[AND_ZERO_0]], i32 3
+; CHECK-NEXT: [[SELECT_2_0:%.*]] = select i1 [[FALSE_1]], i32 [[AND_ZERO_1]], i32 2
+; CHECK-NEXT: [[SELECT_3_0:%.*]] = select i1 [[FALSE_2]], i32 [[AND_ZERO_2]], i32 1
+; CHECK-NEXT: [[MAX_0:%.*]] = add i32 [[SELECT_0_2]], [[SELECT_1_0]]
+; CHECK-NEXT: [[MAX_1:%.*]] = add i32 [[MAX_0]], [[SELECT_2_0]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[MAX_1]], [[SELECT_3_0]]
; CHECK-NEXT: ret i32 [[TMP8]]
;
entry:
More information about the llvm-commits
mailing list