[llvm] 1aff96b - [InstCombine] Add extra tests for preserving load metadata.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 9 04:24:30 PST 2024


Author: Florian Hahn
Date: 2024-11-09T12:24:01Z
New Revision: 1aff96b3dfcc58d62fda5b1452a8029f1a737cc2

URL: https://github.com/llvm/llvm-project/commit/1aff96b3dfcc58d62fda5b1452a8029f1a737cc2
DIFF: https://github.com/llvm/llvm-project/commit/1aff96b3dfcc58d62fda5b1452a8029f1a737cc2.diff

LOG: [InstCombine] Add extra tests for preserving load metadata.

Test cases for https://github.com/llvm/llvm-project/issues/115595.

Added: 
    llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll

Modified: 
    llvm/test/Transforms/InstCombine/loadstore-metadata.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index 247a02f0bcc14a..60546c30fd8ad1 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -20,7 +20,7 @@ define i32 @test_load_cast_combine_noalias(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
 ; CHECK-LABEL: @test_load_cast_combine_noalias(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !alias.scope !3, !noalias !3
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META3]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -48,7 +48,7 @@ define i32 @test_load_cast_combine_invariant(ptr %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
 ; CHECK-LABEL: @test_load_cast_combine_invariant(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !invariant.load !6
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !invariant.load [[META6:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -62,7 +62,7 @@ define i32 @test_load_cast_combine_nontemporal(ptr %ptr) {
 ; metadata.
 ; CHECK-LABEL: @test_load_cast_combine_nontemporal(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !nontemporal !7
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !nontemporal [[META7:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
 entry:
@@ -76,7 +76,7 @@ define ptr @test_load_cast_combine_align(ptr %ptr) {
 ; metadata.
 ; CHECK-LABEL: @test_load_cast_combine_align(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !align !8
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !align [[META8:![0-9]+]]
 ; CHECK-NEXT:    ret ptr [[L]]
 ;
 entry:
@@ -89,7 +89,7 @@ define ptr @test_load_cast_combine_deref(ptr %ptr) {
 ; metadata.
 ; CHECK-LABEL: @test_load_cast_combine_deref(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable !8
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable [[META8]]
 ; CHECK-NEXT:    ret ptr [[L]]
 ;
 entry:
@@ -102,7 +102,7 @@ define ptr @test_load_cast_combine_deref_or_null(ptr %ptr) {
 ; dereferenceable_or_null metadata.
 ; CHECK-LABEL: @test_load_cast_combine_deref_or_null(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable_or_null !8
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !dereferenceable_or_null [[META8]]
 ; CHECK-NEXT:    ret ptr [[L]]
 ;
 entry:
@@ -151,7 +151,7 @@ exit:
 define void @test_load_cast_combine_nonnull(ptr %ptr) {
 ; CHECK-LABEL: @test_load_cast_combine_nonnull(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[P:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !nonnull !6
+; CHECK-NEXT:    [[P:%.*]] = load ptr, ptr [[PTR:%.*]], align 8, !nonnull [[META6]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 336
 ; CHECK-NEXT:    store ptr [[P]], ptr [[GEP]], align 8
 ; CHECK-NEXT:    ret void
@@ -165,7 +165,7 @@ entry:
 
 define i32 @test_load_cast_combine_noundef(ptr %ptr) {
 ; CHECK-LABEL: @test_load_cast_combine_noundef(
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !noundef !6
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, !noundef [[META6]]
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
   %l = load float, ptr %ptr, !noundef !{}
@@ -186,6 +186,81 @@ entry:
   ret i32 %c
 }
 
+; FIXME: Should preserve metadata on loads.
+define double @preserve_load_metadata_after_select_transform(ptr %a, ptr %b) {
+; CHECK-LABEL: @preserve_load_metadata_after_select_transform(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT:    ret double [[L_SEL]]
+;
+entry:
+  %l.a = load double, ptr %a, align 8, !tbaa !0, !llvm.access.group !7
+  %l.b = load double, ptr %b, align 8, !tbaa !0, !llvm.access.group !7
+  %cmp.i = fcmp fast olt double %l.a, %l.b
+  %ptr.sel = select i1 %cmp.i, ptr %b, ptr %a
+  %l.sel = load double, ptr %ptr.sel, align 8, !tbaa !0, !llvm.access.group !7
+  ret double %l.sel
+}
+
+; FIXME: Should preserve metadata on loads.
+define double @preserve_load_metadata_after_select_transform_metadata_missing_1(ptr %a, ptr %b) {
+; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT:    ret double [[L_SEL]]
+;
+entry:
+  %l.a = load double, ptr %a, align 8, !llvm.access.group !7
+  %l.b = load double, ptr %b, align 8, !tbaa !0, !llvm.access.group !7
+  %cmp.i = fcmp fast olt double %l.a, %l.b
+  %ptr.sel = select i1 %cmp.i, ptr %b, ptr %a
+  %l.sel = load double, ptr %ptr.sel, align 8, !tbaa !0, !llvm.access.group !7
+  ret double %l.sel
+}
+
+define double @preserve_load_metadata_after_select_transform_metadata_missing_2(ptr %a, ptr %b) {
+; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT:    ret double [[L_SEL]]
+;
+entry:
+  %l.a = load double, ptr %a, align 8, !llvm.access.group !7
+  %l.b = load double, ptr %b, align 8, !llvm.access.group !7
+  %cmp.i = fcmp fast olt double %l.a, %l.b
+  %ptr.sel = select i1 %cmp.i, ptr %b, ptr %a
+  %l.sel = load double, ptr %ptr.sel, align 8, !tbaa !0, !llvm.access.group !12
+  ret double %l.sel
+}
+
+; FIXME: Should preserve metadata on loads.
+define double @preserve_load_metadata_after_select_transform_metadata_missing_3(ptr %a, ptr %b) {
+; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT:    ret double [[L_SEL]]
+;
+entry:
+  %l.a = load double, ptr %a, align 8, !tbaa !0, !llvm.access.group !7
+  %l.b = load double, ptr %b, align 8, !tbaa !0, !llvm.access.group !7
+  %cmp.i = fcmp fast olt double %l.a, %l.b
+  %ptr.sel = select i1 %cmp.i, ptr %b, ptr %a
+  %l.sel = load double, ptr %ptr.sel, align 8, !tbaa !0, !llvm.access.group !12
+  ret double %l.sel
+}
+
 !0 = !{!1, !1, i64 0}
 !1 = !{!"scalar type", !2}
 !2 = !{!"root"}
@@ -198,3 +273,16 @@ entry:
 !9 = !{i64 8}
 !10 = distinct !{}
 !11 = !{i32 5, i32 6}
+!12 = !{}
+;.
+; CHECK: [[TBAA0]] = !{[[LOOP1]], [[LOOP1]], i64 0}
+; CHECK: [[LOOP1]] = !{!"scalar type", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"root"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]]}
+; CHECK: [[META5]] = distinct !{[[META5]]}
+; CHECK: [[META6]] = !{}
+; CHECK: [[META7]] = !{i32 1}
+; CHECK: [[META8]] = !{i64 8}
+; CHECK: [[ACC_GRP9]] = distinct !{}
+;.

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
new file mode 100644
index 00000000000000..816ed6e831153b
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
@@ -0,0 +1,251 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='default<O3>' -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; FIXME: !llvm.access.group should be preserved, loop should be vectorized.
+; End-to-end test for https://github.com/llvm/llvm-project/issues/115595.
+define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %face_cell, ptr noalias noundef %x, ptr noalias noundef %y) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 noundef [[NFACE:%.*]], i32 noundef [[NCELL:%.*]], ptr noalias nocapture noundef readonly [[FACE_CELL:%.*]], ptr noalias nocapture noundef readonly [[X:%.*]], ptr noalias nocapture noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[NFACE]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
+; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[TMP0]]
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER_NEW]]:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_3:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL:.*]]
+; CHECK:       [[FOR_BODY_EPIL]]:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ], [ 0, %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
+; CHECK-NEXT:    [[GEP_EPIL:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP_EPIL]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[IDXPROM3_EPIL:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_EPIL]]
+; CHECK-NEXT:    [[IDXPROM5_EPIL:%.*]] = sext i32 [[TMP3]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_EPIL]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARRAYIDX4_EPIL]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load double, ptr [[ARRAYIDX6_EPIL]], align 8
+; CHECK-NEXT:    [[CMP_I_EPIL:%.*]] = fcmp fast olt double [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[CMP_I_EPIL]], double [[TMP5]], double [[TMP4]]
+; CHECK-NEXT:    store double [[TMP6]], ptr [[ARRAYIDX4_EPIL]], align 8, !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP7]] to i64
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load double, ptr [[ARRAYIDX6]], align 8
+; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[CMP_I]], double [[TMP10]], double [[TMP9]]
+; CHECK-NEXT:    store double [[TMP11]], ptr [[ARRAYIDX4]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[GEP_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[IDXPROM3_1:%.*]] = sext i32 [[TMP12]] to i64
+; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_1]]
+; CHECK-NEXT:    [[IDXPROM5_1:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_1]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load double, ptr [[ARRAYIDX4_1]], align 8
+; CHECK-NEXT:    [[TMP15:%.*]] = load double, ptr [[ARRAYIDX6_1]], align 8
+; CHECK-NEXT:    [[CMP_I_1:%.*]] = fcmp fast olt double [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[CMP_I_1]], double [[TMP15]], double [[TMP14]]
+; CHECK-NEXT:    store double [[TMP16]], ptr [[ARRAYIDX4_1]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[GEP_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[IDXPROM3_2:%.*]] = sext i32 [[TMP17]] to i64
+; CHECK-NEXT:    [[ARRAYIDX4_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_2]]
+; CHECK-NEXT:    [[IDXPROM5_2:%.*]] = sext i32 [[TMP18]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_2]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load double, ptr [[ARRAYIDX4_2]], align 8
+; CHECK-NEXT:    [[TMP20:%.*]] = load double, ptr [[ARRAYIDX6_2]], align 8
+; CHECK-NEXT:    [[CMP_I_2:%.*]] = fcmp fast olt double [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[CMP_I_2]], double [[TMP20]], double [[TMP19]]
+; CHECK-NEXT:    store double [[TMP21]], ptr [[ARRAYIDX4_2]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[GEP_3]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64
+; CHECK-NEXT:    [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
+; CHECK-NEXT:    [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8
+; CHECK-NEXT:    [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8
+; CHECK-NEXT:    [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]]
+; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]]
+; CHECK-NEXT:    store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+;
+entry:
+  %nface.addr = alloca i32, align 4
+  %ncell.addr = alloca i32, align 4
+  %face_cell.addr = alloca ptr, align 8
+  %x.addr = alloca ptr, align 8
+  %y.addr = alloca ptr, align 8
+  %il = alloca i32, align 4
+  %ir = alloca i32, align 4
+  %iface = alloca i32, align 4
+  store i32 %nface, ptr %nface.addr, align 4, !tbaa !6
+  store i32 %ncell, ptr %ncell.addr, align 4, !tbaa !6
+  store ptr %face_cell, ptr %face_cell.addr, align 8, !tbaa !10
+  store ptr %x, ptr %x.addr, align 8, !tbaa !10
+  store ptr %y, ptr %y.addr, align 8, !tbaa !10
+  call void @llvm.lifetime.start.p0(i64 4, ptr %il) #3
+  call void @llvm.lifetime.start.p0(i64 4, ptr %ir) #3
+  call void @llvm.lifetime.start.p0(i64 4, ptr %iface) #3
+  store i32 0, ptr %iface, align 4, !tbaa !6
+  br label %for.cond
+
+for.cond:
+  %0 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
+  %1 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  call void @llvm.lifetime.end.p0(i64 4, ptr %iface) #3, !llvm.access.group !12
+  br label %for.end
+
+for.body:
+  %2 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12
+  %3 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom
+  %4 = load i32, ptr %arrayidx, align 4, !tbaa !6, !llvm.access.group !12
+  store i32 %4, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
+  %5 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12
+  %6 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
+  %7 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12
+  %add = add nsw i32 %6, %7
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, ptr %5, i64 %idxprom1
+  %8 = load i32, ptr %arrayidx2, align 4, !tbaa !6, !llvm.access.group !12
+  store i32 %8, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12
+  %9 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12
+  %10 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
+  %idxprom3 = sext i32 %10 to i64
+  %arrayidx4 = getelementptr inbounds double, ptr %9, i64 %idxprom3
+  %11 = load ptr, ptr %x.addr, align 8, !tbaa !10, !llvm.access.group !12
+  %12 = load i32, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12
+  %idxprom5 = sext i32 %12 to i64
+  %arrayidx6 = getelementptr inbounds double, ptr %11, i64 %idxprom5
+  %call = call noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %arrayidx4, ptr noundef nonnull align 8 dereferenceable(8) %arrayidx6), !llvm.access.group !12
+  %13 = load double, ptr %call, align 8, !tbaa !13, !llvm.access.group !12
+  %14 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12
+  %15 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
+  %idxprom7 = sext i32 %15 to i64
+  %arrayidx8 = getelementptr inbounds double, ptr %14, i64 %idxprom7
+  store double %13, ptr %arrayidx8, align 8, !tbaa !13, !llvm.access.group !12
+  br label %for.inc
+
+for.inc:
+  %16 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
+  %inc = add nsw i32 %16, 1
+  store i32 %inc, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
+  br label %for.cond, !llvm.loop !15
+
+for.end:
+  call void @llvm.lifetime.end.p0(i64 4, ptr %ir) #3
+  call void @llvm.lifetime.end.p0(i64 4, ptr %il) #3
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+define linkonce_odr noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %__a, ptr noundef nonnull align 8 dereferenceable(8) %__b) #2 {
+entry:
+  %retval = alloca ptr, align 8
+  %__a.addr = alloca ptr, align 8
+  %__b.addr = alloca ptr, align 8
+  store ptr %__a, ptr %__a.addr, align 8, !tbaa !10
+  store ptr %__b, ptr %__b.addr, align 8, !tbaa !10
+  %0 = load ptr, ptr %__a.addr, align 8, !tbaa !10
+  %1 = load double, ptr %0, align 8, !tbaa !13
+  %2 = load ptr, ptr %__b.addr, align 8, !tbaa !10
+  %3 = load double, ptr %2, align 8, !tbaa !13
+  %cmp = fcmp fast olt double %1, %3
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %4 = load ptr, ptr %__b.addr, align 8, !tbaa !10
+  store ptr %4, ptr %retval, align 8
+  br label %return
+
+if.end:
+  %5 = load ptr, ptr %__a.addr, align 8, !tbaa !10
+  store ptr %5, ptr %retval, align 8
+  br label %return
+
+return:
+  %6 = load ptr, ptr %retval, align 8
+  ret ptr %6
+}
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { mustprogress "target-cpu" = "skylake-avx512" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !8, i64 0}
+!8 = !{!"omnipotent char", !9, i64 0}
+!9 = !{!"Simple C++ TBAA"}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"any pointer", !8, i64 0}
+!12 = distinct !{}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"double", !8, i64 0}
+!15 = distinct !{!15, !16, !17, !18}
+!16 = !{!"llvm.loop.mustprogress"}
+!17 = !{!"llvm.loop.parallel_accesses", !12}
+!18 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[ACC_GRP4]] = distinct !{}
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"double", [[META2]], i64 0}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]}
+; CHECK: [[META8]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]], [[META12:![0-9]+]]}
+; CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META11]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]}
+; CHECK: [[META12]] = !{!"llvm.loop.vectorize.enable", i1 true}
+;.


        


More information about the llvm-commits mailing list