[llvm] a976843 - [AArch64] Add a phase-ordering test for a mla reduction sum. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 10 07:40:16 PDT 2025
Author: David Green
Date: 2025-08-10T15:40:12+01:00
New Revision: a976843033485ff44bb4bbb0b0b8a537956b4c40
URL: https://github.com/llvm/llvm-project/commit/a976843033485ff44bb4bbb0b0b8a537956b4c40
DIFF: https://github.com/llvm/llvm-project/commit/a976843033485ff44bb4bbb0b0b8a537956b4c40.diff
LOG: [AArch64] Add a phase-ordering test for a mla reduction sum. NFC
Added:
llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll
llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll
new file mode 100644
index 0000000000000..ca78ff756ef55
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_muladd.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -O3 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64"
+
+; This function (a 16x reduction of a[i] * b[i]) should be vectorized successfully.
+
+define dso_local nofpclass(nan inf) float @vmlaq(ptr noundef %0, ptr noundef %1) #0 {
+; CHECK-LABEL: define dso_local nofpclass(nan inf) float @vmlaq
+; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <16 x float> [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP5]])
+; CHECK-NEXT: ret float [[TMP6]]
+;
+ %3 = alloca ptr, align 8
+ %4 = alloca ptr, align 8
+ %5 = alloca float, align 4
+ %6 = alloca i32, align 4
+ store ptr %0, ptr %3, align 8, !tbaa !4
+ store ptr %1, ptr %4, align 8, !tbaa !4
+ call void @llvm.lifetime.start.p0(ptr %5) #2
+ store float 0.000000e+00, ptr %5, align 4, !tbaa !9
+ call void @llvm.lifetime.start.p0(ptr %6) #2
+ store i32 0, ptr %6, align 4, !tbaa !11
+ br label %7
+
+7: ; preds = %25, %2
+ %8 = load i32, ptr %6, align 4, !tbaa !11
+ %9 = icmp slt i32 %8, 16
+ br i1 %9, label %11, label %10
+
+10: ; preds = %7
+ call void @llvm.lifetime.end.p0(ptr %6) #2
+ br label %28
+
+11: ; preds = %7
+ %12 = load ptr, ptr %3, align 8, !tbaa !4
+ %13 = load i32, ptr %6, align 4, !tbaa !11
+ %14 = sext i32 %13 to i64
+ %15 = getelementptr inbounds float, ptr %12, i64 %14
+ %16 = load float, ptr %15, align 4, !tbaa !9
+ %17 = load ptr, ptr %4, align 8, !tbaa !4
+ %18 = load i32, ptr %6, align 4, !tbaa !11
+ %19 = sext i32 %18 to i64
+ %20 = getelementptr inbounds float, ptr %17, i64 %19
+ %21 = load float, ptr %20, align 4, !tbaa !9
+ %22 = fmul fast float %16, %21
+ %23 = load float, ptr %5, align 4, !tbaa !9
+ %24 = fadd fast float %23, %22
+ store float %24, ptr %5, align 4, !tbaa !9
+ br label %25
+
+25: ; preds = %11
+ %26 = load i32, ptr %6, align 4, !tbaa !11
+ %27 = add nsw i32 %26, 1
+ store i32 %27, ptr %6, align 4, !tbaa !11
+ br label %7, !llvm.loop !13
+
+28: ; preds = %10
+ %29 = load float, ptr %5, align 4, !tbaa !9
+ call void @llvm.lifetime.end.p0(ptr %5) #2
+ ret float %29
+}
+
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #1
+declare void @llvm.lifetime.end.p0(ptr captures(none)) #1
+
+attributes #0 = { nounwind uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 2}
+!2 = !{i32 7, !"frame-pointer", i32 1}
+!3 = !{!"clang version 22.0.0git"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"p1 float", !6, i64 0}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"float", !7, i64 0}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !7, i64 0}
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll
new file mode 100644
index 0000000000000..fb958b822503a
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/reduce_submuladd.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -O3 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64"
+
+; This function (a more complex reduction of (a[i] - b[i]) * itself) should be vectorized successfully.
+
+define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) #0 {
+; CHECK-LABEL: define dso_local noundef nofpclass(nan inf) float @_Z4testPKfS0_ii
+; CHECK-SAME: (ptr noundef readonly captures(none) [[TMP0:%.*]], ptr noundef readonly captures(none) [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: .preheader.i:
+; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = load <20 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = load <20 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <20 x float> [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <20 x float> [[TMP8]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 80
+; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
+; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP14:%.*]] = fsub fast float [[TMP11]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = fmul fast float [[TMP14]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[TMP4]]
+; CHECK-NEXT: [[OP_RDX:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP15]], <20 x float> [[TMP9]])
+; CHECK-NEXT: [[TMP18:%.*]] = load <20 x float>, ptr [[TMP16]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP19:%.*]] = load <20 x float>, ptr [[TMP17]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP20:%.*]] = fsub fast <20 x float> [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <20 x float> [[TMP20]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP16]], i64 80
+; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 80
+; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP26:%.*]] = fsub fast float [[TMP23]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = fmul fast float [[TMP26]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP4]]
+; CHECK-NEXT: [[OP_RDX_1:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP27]], <20 x float> [[TMP21]])
+; CHECK-NEXT: [[OP_RDX3_1:%.*]] = fadd fast float [[OP_RDX_1]], [[OP_RDX]]
+; CHECK-NEXT: [[TMP30:%.*]] = load <20 x float>, ptr [[TMP28]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP31:%.*]] = load <20 x float>, ptr [[TMP29]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP32:%.*]] = fsub fast <20 x float> [[TMP30]], [[TMP31]]
+; CHECK-NEXT: [[TMP33:%.*]] = fmul fast <20 x float> [[TMP32]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP28]], i64 80
+; CHECK-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP29]], i64 80
+; CHECK-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP38:%.*]] = fsub fast float [[TMP35]], [[TMP37]]
+; CHECK-NEXT: [[TMP39:%.*]] = fmul fast float [[TMP38]], [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[TMP4]]
+; CHECK-NEXT: [[OP_RDX_2:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP39]], <20 x float> [[TMP33]])
+; CHECK-NEXT: [[OP_RDX3_2:%.*]] = fadd fast float [[OP_RDX_2]], [[OP_RDX3_1]]
+; CHECK-NEXT: [[TMP42:%.*]] = load <20 x float>, ptr [[TMP40]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP43:%.*]] = load <20 x float>, ptr [[TMP41]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP44:%.*]] = fsub fast <20 x float> [[TMP42]], [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = fmul fast <20 x float> [[TMP44]], [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP40]], i64 80
+; CHECK-NEXT: [[TMP47:%.*]] = load float, ptr [[TMP46]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP41]], i64 80
+; CHECK-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP48]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float [[TMP47]], [[TMP49]]
+; CHECK-NEXT: [[TMP51:%.*]] = fmul fast float [[TMP50]], [[TMP50]]
+; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, ptr [[TMP40]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP41]], i64 [[TMP4]]
+; CHECK-NEXT: [[OP_RDX_3:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP51]], <20 x float> [[TMP45]])
+; CHECK-NEXT: [[OP_RDX3_3:%.*]] = fadd fast float [[OP_RDX_3]], [[OP_RDX3_2]]
+; CHECK-NEXT: [[TMP54:%.*]] = load <20 x float>, ptr [[TMP52]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP55:%.*]] = load <20 x float>, ptr [[TMP53]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP56:%.*]] = fsub fast <20 x float> [[TMP54]], [[TMP55]]
+; CHECK-NEXT: [[TMP57:%.*]] = fmul fast <20 x float> [[TMP56]], [[TMP56]]
+; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP52]], i64 80
+; CHECK-NEXT: [[TMP59:%.*]] = load float, ptr [[TMP58]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP53]], i64 80
+; CHECK-NEXT: [[TMP61:%.*]] = load float, ptr [[TMP60]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP62:%.*]] = fsub fast float [[TMP59]], [[TMP61]]
+; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP62]], [[TMP62]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP52]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP53]], i64 [[TMP4]]
+; CHECK-NEXT: [[OP_RDX_4:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP63]], <20 x float> [[TMP57]])
+; CHECK-NEXT: [[OP_RDX3_4:%.*]] = fadd fast float [[OP_RDX_4]], [[OP_RDX3_3]]
+; CHECK-NEXT: [[TMP66:%.*]] = load <20 x float>, ptr [[TMP64]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP67:%.*]] = load <20 x float>, ptr [[TMP65]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP68:%.*]] = fsub fast <20 x float> [[TMP66]], [[TMP67]]
+; CHECK-NEXT: [[TMP69:%.*]] = fmul fast <20 x float> [[TMP68]], [[TMP68]]
+; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP64]], i64 80
+; CHECK-NEXT: [[TMP71:%.*]] = load float, ptr [[TMP70]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP65]], i64 80
+; CHECK-NEXT: [[TMP73:%.*]] = load float, ptr [[TMP72]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP74:%.*]] = fsub fast float [[TMP71]], [[TMP73]]
+; CHECK-NEXT: [[TMP75:%.*]] = fmul fast float [[TMP74]], [[TMP74]]
+; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, ptr [[TMP65]], i64 [[TMP4]]
+; CHECK-NEXT: [[OP_RDX_5:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP75]], <20 x float> [[TMP69]])
+; CHECK-NEXT: [[OP_RDX3_5:%.*]] = fadd fast float [[OP_RDX_5]], [[OP_RDX3_4]]
+; CHECK-NEXT: [[TMP78:%.*]] = load <20 x float>, ptr [[TMP76]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP79:%.*]] = load <20 x float>, ptr [[TMP77]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP80:%.*]] = fsub fast <20 x float> [[TMP78]], [[TMP79]]
+; CHECK-NEXT: [[TMP81:%.*]] = fmul fast <20 x float> [[TMP80]], [[TMP80]]
+; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP76]], i64 80
+; CHECK-NEXT: [[TMP83:%.*]] = load float, ptr [[TMP82]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP77]], i64 80
+; CHECK-NEXT: [[TMP85:%.*]] = load float, ptr [[TMP84]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT: [[TMP86:%.*]] = fsub fast float [[TMP83]], [[TMP85]]
+; CHECK-NEXT: [[TMP87:%.*]] = fmul fast float [[TMP86]], [[TMP86]]
+; CHECK-NEXT: [[OP_RDX_6:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v20f32(float [[TMP87]], <20 x float> [[TMP81]])
+; CHECK-NEXT: [[OP_RDX3_6:%.*]] = fadd fast float [[OP_RDX_6]], [[OP_RDX3_5]]
+; CHECK-NEXT: ret float [[OP_RDX3_6]]
+;
+ %5 = alloca ptr, align 8
+ %6 = alloca ptr, align 8
+ %7 = alloca i32, align 4
+ %8 = alloca i32, align 4
+ store ptr %0, ptr %5, align 8, !tbaa !4
+ store ptr %1, ptr %6, align 8, !tbaa !4
+ store i32 %2, ptr %7, align 4, !tbaa !9
+ store i32 %3, ptr %8, align 4, !tbaa !9
+ %9 = load ptr, ptr %5, align 8, !tbaa !4
+ %10 = load ptr, ptr %6, align 8, !tbaa !4
+ %11 = load i32, ptr %7, align 4, !tbaa !9
+ %12 = load i32, ptr %8, align 4, !tbaa !9
+ %13 = call fast noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr noundef %9, ptr noundef %10, i32 noundef %11, i32 noundef %12)
+ ret float %13
+}
+
+define internal noundef nofpclass(nan inf) float @_ZL6reduceILi7EEfPKfS1_ii(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3) #1 {
+ %5 = alloca ptr, align 8
+ %6 = alloca ptr, align 8
+ %7 = alloca i32, align 4
+ %8 = alloca i32, align 4
+ %9 = alloca i32, align 4
+ %10 = alloca i32, align 4
+ %11 = alloca i32, align 4
+ %12 = alloca float, align 4
+ %13 = alloca i32, align 4
+ %14 = alloca i32, align 4
+ %15 = alloca float, align 4
+ %16 = alloca i32, align 4
+ %17 = alloca float, align 4
+ store ptr %0, ptr %5, align 8, !tbaa !4
+ store ptr %1, ptr %6, align 8, !tbaa !4
+ store i32 %2, ptr %7, align 4, !tbaa !9
+ store i32 %3, ptr %8, align 4, !tbaa !9
+ call void @llvm.lifetime.start.p0(ptr %9) #3
+ store i32 3, ptr %9, align 4, !tbaa !9
+ call void @llvm.lifetime.start.p0(ptr %10) #3
+ store i32 3, ptr %10, align 4, !tbaa !9
+ call void @llvm.lifetime.start.p0(ptr %11) #3
+ store i32 7, ptr %11, align 4, !tbaa !9
+ call void @llvm.lifetime.start.p0(ptr %12) #3
+ store float 0.000000e+00, ptr %12, align 4, !tbaa !11
+ call void @llvm.lifetime.start.p0(ptr %13) #3
+ store i32 0, ptr %13, align 4, !tbaa !9
+ br label %18
+
+18: ; preds = %59, %4
+ %19 = load i32, ptr %13, align 4, !tbaa !9
+ %20 = icmp slt i32 %19, 7
+ br i1 %20, label %22, label %21
+
+21: ; preds = %18
+ store i32 2, ptr %14, align 4
+ call void @llvm.lifetime.end.p0(ptr %13) #3
+ br label %62
+
+22: ; preds = %18
+ call void @llvm.lifetime.start.p0(ptr %15) #3
+ store float 0.000000e+00, ptr %15, align 4, !tbaa !11
+ call void @llvm.lifetime.start.p0(ptr %16) #3
+ store i32 0, ptr %16, align 4, !tbaa !9
+ br label %23
+
+23: ; preds = %44, %22
+ %24 = load i32, ptr %16, align 4, !tbaa !9
+ %25 = icmp slt i32 %24, 21
+ br i1 %25, label %27, label %26
+
+26: ; preds = %23
+ store i32 5, ptr %14, align 4
+ call void @llvm.lifetime.end.p0(ptr %16) #3
+ br label %47
+
+27: ; preds = %23
+ call void @llvm.lifetime.start.p0(ptr %17) #3
+ %28 = load ptr, ptr %5, align 8, !tbaa !4
+ %29 = load i32, ptr %16, align 4, !tbaa !9
+ %30 = sext i32 %29 to i64
+ %31 = getelementptr inbounds float, ptr %28, i64 %30
+ %32 = load float, ptr %31, align 4, !tbaa !11
+ %33 = load ptr, ptr %6, align 8, !tbaa !4
+ %34 = load i32, ptr %16, align 4, !tbaa !9
+ %35 = sext i32 %34 to i64
+ %36 = getelementptr inbounds float, ptr %33, i64 %35
+ %37 = load float, ptr %36, align 4, !tbaa !11
+ %38 = fsub fast float %32, %37
+ store float %38, ptr %17, align 4, !tbaa !11
+ %39 = load float, ptr %17, align 4, !tbaa !11
+ %40 = load float, ptr %17, align 4, !tbaa !11
+ %41 = fmul fast float %39, %40
+ %42 = load float, ptr %15, align 4, !tbaa !11
+ %43 = fadd fast float %42, %41
+ store float %43, ptr %15, align 4, !tbaa !11
+ call void @llvm.lifetime.end.p0(ptr %17) #3
+ br label %44
+
+44: ; preds = %27
+ %45 = load i32, ptr %16, align 4, !tbaa !9
+ %46 = add nsw i32 %45, 1
+ store i32 %46, ptr %16, align 4, !tbaa !9
+ br label %23, !llvm.loop !13
+
+47: ; preds = %26
+ %48 = load i32, ptr %7, align 4, !tbaa !9
+ %49 = load ptr, ptr %5, align 8, !tbaa !4
+ %50 = sext i32 %48 to i64
+ %51 = getelementptr inbounds float, ptr %49, i64 %50
+ store ptr %51, ptr %5, align 8, !tbaa !4
+ %52 = load i32, ptr %8, align 4, !tbaa !9
+ %53 = load ptr, ptr %6, align 8, !tbaa !4
+ %54 = sext i32 %52 to i64
+ %55 = getelementptr inbounds float, ptr %53, i64 %54
+ store ptr %55, ptr %6, align 8, !tbaa !4
+ %56 = load float, ptr %15, align 4, !tbaa !11
+ %57 = load float, ptr %12, align 4, !tbaa !11
+ %58 = fadd fast float %57, %56
+ store float %58, ptr %12, align 4, !tbaa !11
+ call void @llvm.lifetime.end.p0(ptr %15) #3
+ br label %59
+
+59: ; preds = %47
+ %60 = load i32, ptr %13, align 4, !tbaa !9
+ %61 = add nsw i32 %60, 1
+ store i32 %61, ptr %13, align 4, !tbaa !9
+ br label %18, !llvm.loop !15
+
+62: ; preds = %21
+ %63 = load float, ptr %12, align 4, !tbaa !11
+ store i32 1, ptr %14, align 4
+ call void @llvm.lifetime.end.p0(ptr %12) #3
+ call void @llvm.lifetime.end.p0(ptr %11) #3
+ call void @llvm.lifetime.end.p0(ptr %10) #3
+ call void @llvm.lifetime.end.p0(ptr %9) #3
+ ret float %63
+}
+
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #2
+declare void @llvm.lifetime.end.p0(ptr captures(none)) #2
+
+attributes #0 = { mustprogress uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
+attributes #1 = { inlinehint mustprogress nounwind uwtable "approx-func-fp-math"="true" "frame-pointer"="non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" "unsafe-fp-math"="true" }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 2}
+!2 = !{i32 7, !"frame-pointer", i32 1}
+!3 = !{!"clang version 22.0.0git"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"p1 float", !6, i64 0}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C++ TBAA"}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !7, i64 0}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"float", !7, i64 0}
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.mustprogress"}
+!15 = distinct !{!15, !14}
More information about the llvm-commits
mailing list