[llvm] [VectorCombine][X86] foldShuffleOfIntrinsics - provide the arguments to a getShuffleCost call (PR #170465)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 03:46:13 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Ensure the arguments are passed to the getShuffleCost calls to improve cost analysis, in particular if these are constant the costs will be recognised as free
Noticed while reviewing #<!-- -->170052
---
Full diff: https://github.com/llvm/llvm-project/pull/170465.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+3-2)
- (modified) llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll (+17-18)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index f1890e4f5fb95..e34a70c54ee4a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2924,8 +2924,9 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
auto *ArgTy = FixedVectorType::get(VecTy->getElementType(),
ShuffleDstTy->getNumElements());
NewArgsTy.push_back(ArgTy);
- NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- ArgTy, VecTy, OldMask, CostKind);
+ NewCost += TTI.getShuffleCost(
+ TargetTransformInfo::SK_PermuteTwoSrc, ArgTy, VecTy, OldMask,
+ CostKind, 0, nullptr, {II0->getArgOperand(I), II1->getArgOperand(I)});
}
}
IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll
index 4710bdee11472..b05f851a846f4 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll
@@ -15,18 +15,11 @@ define <4 x float> @shuffle_fma_const_chain(<4 x float> %a0) {
}
define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
-; SSE-LABEL: define <8 x float> @concat_fma_const_chain(
-; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
-; SSE-NEXT: ret <8 x float> [[RES]]
-;
-; AVX-LABEL: define <8 x float> @concat_fma_const_chain(
-; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
-; AVX-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
-; AVX-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
-; AVX-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; AVX-NEXT: ret <8 x float> [[RES]]
+; CHECK-LABEL: define <8 x float> @concat_fma_const_chain(
+; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
+; CHECK-NEXT: ret <8 x float> [[RES]]
;
%l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
%h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
@@ -35,12 +28,18 @@ define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
}
define <8 x float> @interleave_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: define <8 x float> @interleave_fma_const_chain(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
-; CHECK-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT: ret <8 x float> [[RES]]
+; SSE-LABEL: define <8 x float> @interleave_fma_const_chain(
+; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
+; SSE-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
+; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; SSE-NEXT: ret <8 x float> [[RES]]
+;
+; AVX-LABEL: define <8 x float> @interleave_fma_const_chain(
+; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; AVX-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
+; AVX-NEXT: ret <8 x float> [[RES]]
;
%l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
%h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
``````````
</details>
https://github.com/llvm/llvm-project/pull/170465
More information about the llvm-commits
mailing list