[llvm] 7b82873 - [LV] Add tests with multiple store groups re-using widened ops.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 09:11:15 PDT 2025
Author: Florian Hahn
Date: 2025-09-10T17:10:46+01:00
New Revision: 7b828738c692ba64a48b4f4f6763064d58ef0618
URL: https://github.com/llvm/llvm-project/commit/7b828738c692ba64a48b4f4f6763064d58ef0618
DIFF: https://github.com/llvm/llvm-project/commit/7b828738c692ba64a48b4f4f6763064d58ef0618.diff
LOG: [LV] Add tests with multiple store groups re-using widened ops.
Test coverage for https://github.com/llvm/llvm-project/issues/156190.
Added:
Modified:
llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
index ab975a6b9ffcf..687222942dc1b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
@@ -1168,3 +1168,84 @@ loop:
exit:
ret void
}
+
+; FIXME: Currently incorrectly narrows the load,
+; https://github.com/llvm/llvm-project/issues/156190.
+define void @multiple_store_groups_storing_same_wide_bin_op(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
+; VF2-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
+; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
+; VF2-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8
+; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
+; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[TMP2:%.*]] = fadd contract <2 x double> [[BROADCAST_SPLAT]], splat (double 2.000000e+01)
+; VF2-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
+; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP3]], align 8
+; VF2-NEXT: [[TMP4:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
+; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP4]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br [[EXIT:label %.*]]
+; VF2: [[SCALAR_PH]]:
+;
+; VF4-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
+; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; VF4-NEXT: [[ENTRY:.*:]]
+; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF4: [[VECTOR_PH]]:
+; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4: [[VECTOR_BODY]]:
+; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
+; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP0]], align 8
+; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; VF4-NEXT: [[TMP1:%.*]] = fadd contract <4 x double> [[STRIDED_VEC]], splat (double 2.000000e+01)
+; VF4-NEXT: [[TMP2:%.*]] = fadd contract <4 x double> [[STRIDED_VEC1]], splat (double 2.000000e+01)
+; VF4-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
+; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
+; VF4-NEXT: [[TMP5:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
+; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
+; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; VF4: [[MIDDLE_BLOCK]]:
+; VF4-NEXT: br [[EXIT:label %.*]]
+; VF4: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr { double, double }, ptr %A, i64 %iv
+ %l.A.0 = load double, ptr %gep.A, align 8
+ %gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A, i64 8
+ %l.A.1 = load double, ptr %gep.A.1, align 8
+ %add.0 = fadd contract double %l.A.0, 20.0
+ %add.1 = fadd contract double %l.A.1, 20.0
+ %gep.B = getelementptr { double, double }, ptr %B, i64 %iv
+ store double %add.0, ptr %gep.B, align 8
+ %gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B, i64 8
+ store double %add.1, ptr %gep.B.1, align 8
+ %gep.C = getelementptr { double, double }, ptr %C, i64 %iv
+ %gep.C.1 = getelementptr inbounds nuw i8, ptr %gep.C, i64 8
+ store double %add.0, ptr %gep.C, align 8
+ store double %add.1, ptr %gep.C.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %.not = icmp eq i64 %iv.next, 1000
+ br i1 %.not, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll
index 41b89dc0a4df6..98763a2c92379 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll
@@ -585,3 +585,76 @@ loop:
exit:
ret void
}
+
+define void @multiple_store_groups_storing_same_load_group(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
+; VF2-LABEL: define void @multiple_store_groups_storing_same_load_group(
+; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
+; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP1]], align 8
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
+; VF2-NEXT: store <2 x double> [[WIDE_LOAD1]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; VF2-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br [[EXIT:label %.*]]
+; VF2: [[SCALAR_PH]]:
+;
+; VF4-LABEL: define void @multiple_store_groups_storing_same_load_group(
+; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; VF4-NEXT: [[ENTRY:.*:]]
+; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF4: [[VECTOR_PH]]:
+; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4: [[VECTOR_BODY]]:
+; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
+; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP0]], align 8
+; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; VF4-NEXT: [[TMP1:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
+; VF4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[STRIDED_VEC]], <4 x double> [[STRIDED_VEC1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
+; VF4-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[C]], i64 [[INDEX]]
+; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8
+; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; VF4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; VF4-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; VF4: [[MIDDLE_BLOCK]]:
+; VF4-NEXT: br [[EXIT:label %.*]]
+; VF4: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.A = getelementptr { double, double }, ptr %A, i64 %iv
+ %gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A, i64 8
+ %l.A.0 = load double, ptr %gep.A, align 8
+ %l.A.1 = load double, ptr %gep.A.1, align 8
+ %gep.B = getelementptr { double, double }, ptr %B, i64 %iv
+ %gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B, i64 8
+ store double %l.A.0, ptr %gep.B, align 8
+ store double %l.A.1, ptr %gep.B.1, align 8
+ %gep.C = getelementptr { double, double }, ptr %C, i64 %iv
+ %gep.C.1 = getelementptr inbounds nuw i8, ptr %gep.C, i64 8
+ store double %l.A.0, ptr %gep.C, align 8
+ store double %l.A.1, ptr %gep.C.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %.not = icmp eq i64 %iv.next, 1000
+ br i1 %.not, label %exit, label %loop
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list