[llvm] [AArch64] Tune unrolling prefs for more patterns on Apple CPUs (PR #149358)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 31 02:20:58 PDT 2025


================
@@ -165,6 +165,126 @@ exit:
   ret void
 }
 
+define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k) {
+; APPLE-LABEL: define void @load_op_store_loop(
+; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
+; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
+; APPLE-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; APPLE-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE:       [[ENTRY_NEW]]:
+; APPLE-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
+; APPLE-NEXT:    br label %[[LOOP:.*]]
+; APPLE:       [[LOOP]]:
+; APPLE-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
+; APPLE-NEXT:    [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
+; APPLE-NEXT:    [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
+; APPLE-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
+; APPLE-NEXT:    [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; APPLE-NEXT:    [[O:%.*]] = fadd float [[L]], [[K]]
+; APPLE-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
+; APPLE-NEXT:    store float [[O]], ptr [[GEP_DST]], align 4
+; APPLE-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; APPLE-NEXT:    [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
+; APPLE-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
+; APPLE-NEXT:    [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
+; APPLE-NEXT:    [[O_1:%.*]] = fadd float [[L_1]], [[K]]
+; APPLE-NEXT:    [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
+; APPLE-NEXT:    store float [[O_1]], ptr [[GEP_DST_1]], align 4
+; APPLE-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
+; APPLE-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; APPLE-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; APPLE-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; APPLE-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE:       [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; APPLE:       [[LOOP_EPIL_PREHEADER]]:
+; APPLE-NEXT:    br label %[[LOOP_EPIL:.*]]
+; APPLE:       [[LOOP_EPIL]]:
+; APPLE-NEXT:    [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
+; APPLE-NEXT:    [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
+; APPLE-NEXT:    [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
+; APPLE-NEXT:    [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
+; APPLE-NEXT:    [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]]
+; APPLE-NEXT:    store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
+; APPLE-NEXT:    br label %[[EXIT]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    ret void
+;
+; OTHER-LABEL: define void @load_op_store_loop(
+; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
+; OTHER-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
+; OTHER-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; OTHER-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; OTHER:       [[ENTRY_NEW]]:
+; OTHER-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
+; OTHER-NEXT:    br label %[[LOOP:.*]]
+; OTHER:       [[LOOP]]:
+; OTHER-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
+; OTHER-NEXT:    [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
+; OTHER-NEXT:    [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
+; OTHER-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
+; OTHER-NEXT:    [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; OTHER-NEXT:    [[O:%.*]] = fadd float [[L]], [[K]]
+; OTHER-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
+; OTHER-NEXT:    store float [[O]], ptr [[GEP_DST]], align 4
+; OTHER-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; OTHER-NEXT:    [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
+; OTHER-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
+; OTHER-NEXT:    [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
+; OTHER-NEXT:    [[O_1:%.*]] = fadd float [[L_1]], [[K]]
+; OTHER-NEXT:    [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
+; OTHER-NEXT:    store float [[O_1]], ptr [[GEP_DST_1]], align 4
+; OTHER-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
+; OTHER-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; OTHER-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; OTHER-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
+; OTHER:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; OTHER-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
+; OTHER-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; OTHER:       [[EXIT_UNR_LCSSA]]:
+; OTHER-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; OTHER-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; OTHER-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; OTHER:       [[LOOP_EPIL_PREHEADER]]:
+; OTHER-NEXT:    br label %[[LOOP_EPIL:.*]]
+; OTHER:       [[LOOP_EPIL]]:
+; OTHER-NEXT:    [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
+; OTHER-NEXT:    [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
+; OTHER-NEXT:    [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
+; OTHER-NEXT:    [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
+; OTHER-NEXT:    [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_UNR]]
+; OTHER-NEXT:    store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
+; OTHER-NEXT:    br label %[[EXIT]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    ret void
+;
+entry:
----------------
fhahn wrote:

can you adde a test case with multiple blocks where users are across different blocks?

https://github.com/llvm/llvm-project/pull/149358


More information about the llvm-commits mailing list