[clang] [llvm] [VectorCombine] Shrink loads used in shufflevector rebroadcasts (PR #128938)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 26 11:53:56 PDT 2025


================
@@ -47,21 +47,12 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
 ; broadcast loads are free on AVX (and blends are much cheap than general 2-operand shuffles)
 
 define  <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1)  {
-; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64(
-; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
-; SSE-NEXT:    [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
-; SSE-NEXT:    [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
-; SSE-NEXT:    [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> <i32 0, i32 4, i32 4, i32 0>
-; SSE-NEXT:    ret <4 x double> [[BLEND]]
-;
-; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64(
-; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
-; AVX-NEXT:    [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
-; AVX-NEXT:    [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
-; AVX-NEXT:    [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:    [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:    [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; AVX-NEXT:    ret <4 x double> [[BLEND]]
+; CHECK-LABEL: define <4 x double> @blend_broadcasts_v4f64(
+; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x double>, ptr [[P0]], align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x double>, ptr [[P1]], align 32
+; CHECK-NEXT:    [[BLEND:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x double> [[BLEND]]
 ;
----------------
RKSimon wrote:

https://clang.godbolt.org/z/Kv7cT4PdP - this is a definite regression

https://github.com/llvm/llvm-project/pull/128938


More information about the llvm-commits mailing list