[llvm] [MachineLICM] Allow hoisting loads from invariant address (PR #70796)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 02:17:25 PDT 2023
================
@@ -0,0 +1,428 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+
+define i64 @one_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
+; CHECK-LABEL: one_dimensional:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: cbz x2, .LBB0_2
+; CHECK-NEXT: .LBB0_1: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr x9, [x0], #8
+; CHECK-NEXT: ldr w10, [x1]
+; CHECK-NEXT: ldr w9, [x9]
+; CHECK-NEXT: cmp w9, w10
+; CHECK-NEXT: cinc x8, x8, ne
+; CHECK-NEXT: subs x2, x2, #1
+; CHECK-NEXT: b.ne .LBB0_1
+; CHECK-NEXT: .LBB0_2: // %for.cond.cleanup
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+entry:
+ %cmp4 = icmp eq i64 %N, 0
+ br i1 %cmp4, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.body ]
+ ret i64 %sum.0.lcssa
+
+for.body: ; preds = %entry, %for.body
+ %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+ %sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
+ %0 = load ptr, ptr %arrayidx, align 8
+ %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4)
+ %tobool = icmp ne i32 %bcmp, 0
+ %add = zext i1 %tobool to i64
+ %spec.select = add i64 %sum.05, %add
+ %inc = add nuw i64 %i.06, 1
+ %exitcond = icmp eq i64 %inc, %N
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define i64 @two_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
+; CHECK-LABEL: two_dimensional:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: cbz x2, .LBB1_6
+; CHECK-NEXT: // %bb.1: // %entry
+; CHECK-NEXT: cbz x3, .LBB1_6
+; CHECK-NEXT: // %bb.2: // %for.cond1.preheader.preheader
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: .LBB1_3: // %for.cond1.preheader
+; CHECK-NEXT: // =>This Loop Header: Depth=1
+; CHECK-NEXT: // Child Loop BB1_4 Depth 2
+; CHECK-NEXT: ldr x10, [x0, x9, lsl #3]
+; CHECK-NEXT: mov x11, x3
+; CHECK-NEXT: .LBB1_4: // %for.body4
+; CHECK-NEXT: // Parent Loop BB1_3 Depth=1
+; CHECK-NEXT: // => This Inner Loop Header: Depth=2
+; CHECK-NEXT: ldr x12, [x10], #8
+; CHECK-NEXT: ldr w13, [x1]
+; CHECK-NEXT: ldr w12, [x12]
+; CHECK-NEXT: cmp w12, w13
+; CHECK-NEXT: cinc x8, x8, ne
+; CHECK-NEXT: subs x11, x11, #1
+; CHECK-NEXT: b.ne .LBB1_4
+; CHECK-NEXT: // %bb.5: // %for.cond1.for.cond.cleanup3_crit_edge
+; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1
+; CHECK-NEXT: add x9, x9, #1
+; CHECK-NEXT: cmp x9, x2
+; CHECK-NEXT: b.ne .LBB1_3
+; CHECK-NEXT: .LBB1_6: // %for.cond.cleanup
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+entry:
+ %cmp17 = icmp eq i64 %N, 0
+ %cmp214 = icmp eq i64 %M, 0
+ %or.cond = or i1 %cmp17, %cmp214
+ br i1 %or.cond, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond.cleanup3_crit_edge
+ %i.019 = phi i64 [ %inc7, %for.cond1.for.cond.cleanup3_crit_edge ], [ 0, %entry ]
+ %sum.018 = phi i64 [ %spec.select, %for.cond1.for.cond.cleanup3_crit_edge ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.019
+ %0 = load ptr, ptr %arrayidx, align 8
+ br label %for.body4
+
+for.body4: ; preds = %for.cond1.preheader, %for.body4
+ %j.016 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
+ %sum.115 = phi i64 [ %sum.018, %for.cond1.preheader ], [ %spec.select, %for.body4 ]
+ %arrayidx5 = getelementptr inbounds ptr, ptr %0, i64 %j.016
+ %1 = load ptr, ptr %arrayidx5, align 8
+ %bcmp = tail call i32 @bcmp(ptr %1, ptr %b, i64 4)
+ %tobool = icmp ne i32 %bcmp, 0
+ %add = zext i1 %tobool to i64
+ %spec.select = add i64 %sum.115, %add
+ %inc = add nuw i64 %j.016, 1
+ %exitcond = icmp eq i64 %inc, %M
+ br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge, label %for.body4
+
+for.cond1.for.cond.cleanup3_crit_edge: ; preds = %for.body4
+ %inc7 = add nuw i64 %i.019, 1
+ %exitcond22 = icmp eq i64 %inc7, %N
+ br i1 %exitcond22, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge, %entry
+ %sum.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.cond1.for.cond.cleanup3_crit_edge ]
+ ret i64 %sum.0.lcssa
+}
+
+define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
----------------
david-arm wrote:
Might be useful to have a comment explaining how this is different to `@three_dimensional`, since it's not immediately obvious. Or you could even have simple C equivalents in comments above the functions?
https://github.com/llvm/llvm-project/pull/70796
More information about the llvm-commits
mailing list