[llvm] [llvm][CodeGen] Address the issue of multiple resource reservations in window scheduling (PR #100301)
Kai Yan via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 25 03:01:51 PDT 2024
================
@@ -0,0 +1,99 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: -window-search-ratio=100 -window-search-num=100 -window-diff-limit=1 \
+# RUN: | FileCheck %s
+
+# We want to verify that all three V6_vaddw instructions are emitted in the same cycle.
+# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}.
+# CHECK-NEXT: Cycle [[CycleNum:[0-9]+]] [[[StageNum:S.[0-9]+]]]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
+# CHECK-NEXT: Cycle [[CycleNum]] [[[StageNum]]]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
+# CHECK-NEXT: Cycle [[CycleNum]] [[[StageNum]]]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
+
+--- |
+ define void @add_parallel(i32 %N, ptr noalias %x, ptr noalias %y) {
+ entry:
+ %isZeroLength = icmp eq i32 %N, 0
+ br i1 %isZeroLength, label %loop.exit, label %loop.preheader
+
+ loop.preheader: ; preds = %entry
+ %half_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608)
+ %one_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1065353216)
+ %two_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1073741824)
+ br label %loop.body
+
+ loop.exit: ; preds = %loop.body, %entry
+ ret void
+
+ loop.body: ; preds = %loop.body, %loop.preheader
+ %lsr.iv1 = phi ptr [ %cgep2, %loop.body ], [ %x, %loop.preheader ]
+ %lsr.iv = phi ptr [ %cgep1, %loop.body ], [ %y, %loop.preheader ]
+ %index = phi i32 [ 0, %loop.preheader ], [ %index.next, %loop.body ]
+ %vec_x1 = load <32 x i32>, ptr %lsr.iv1, align 128
+ %vec_add_1 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %one_splat, <32 x i32> %vec_x1)
+ %vec_add_2 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %half_splat, <32 x i32> %vec_x1)
+ %vec_add_3 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %two_splat, <32 x i32> %vec_x1)
+ %vec_add_4 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_2)
+ %vec_add_5 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_3)
+ %vec_add_6 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_5, <32 x i32> %vec_add_4)
+ store <32 x i32> %vec_add_6, ptr %lsr.iv, align 128
+ %index.next = add nuw i32 %index, 32
+ %continue = icmp ult i32 %index.next, %N
+ %cgep1 = getelementptr i8, ptr %lsr.iv, i32 128
+ %cgep2 = getelementptr i8, ptr %lsr.iv1, i32 128
+ br i1 %continue, label %loop.body, label %loop.exit
+ }
+
+ declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32)
+ declare <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32>, <32 x i32>)
+...
+---
+name: add_parallel
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.2(0x30000000), %bb.1(0x50000000)
+ liveins: $r0, $r1, $r2
+
+ %11:intregs = COPY $r2
----------------
kaiyan96 wrote:
Updated.
https://github.com/llvm/llvm-project/pull/100301
More information about the llvm-commits
mailing list