[llvm] d176c8d - [VectorCombine] foldShuffleOfBinops - add test showing failure to recognise that the new shuffle is repeated (so only a single cost) (#172708)

Wed Dec 17 10:56:22 PST 2025

Author: Simon Pilgrim
Date: 2025-12-17T18:56:18Z
New Revision: d176c8d20f2215c3c62ae468639544cfe3444751

URL: https://github.com/llvm/llvm-project/commit/d176c8d20f2215c3c62ae468639544cfe3444751
DIFF: https://github.com/llvm/llvm-project/commit/d176c8d20f2215c3c62ae468639544cfe3444751.diff

LOG: [VectorCombine] foldShuffleOfBinops - add test showing failure to recognise that the new shuffle is repeated (so only a single cost) (#172708)

Similar to #170867

Added: 
    

Modified: 
    llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
index 77b44d0e40e14..8c66684dfe137 100644

--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
@@ -59,6 +59,29 @@ define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x
   ret <4 x float> %r
 }
 
+; FIXME: For repeated ops, don't repeat the shuffle cost.
+
+define <8 x float> @shuf_fmul_v4f32_self_mul(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: define <8 x float> @shuf_fmul_v4f32_self_mul(
+; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE-NEXT:    [[R:%.*]] = fmul <8 x float> [[TMP1]], [[TMP2]]
+; SSE-NEXT:    ret <8 x float> [[R]]
+;
+; AVX-LABEL: define <8 x float> @shuf_fmul_v4f32_self_mul(
+; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[M0:%.*]] = fmul <4 x float> [[A0]], [[A0]]
+; AVX-NEXT:    [[M1:%.*]] = fmul <4 x float> [[A1]], [[A1]]
+; AVX-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[M0]], <4 x float> [[M1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX-NEXT:    ret <8 x float> [[R]]
+;
+  %m0 = fmul <4 x float> %a0, %a0
+  %m1 = fmul <4 x float> %a1, %a1
+  %r = shufflevector <4 x float> %m0, <4 x float> %m1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %r
+}
+
 ; For commutative instructions, common operand may be swapped.
 
 define <2 x i64> @shuf_and_v2i64_yy_swap(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {