[llvm] [MachinePipeliner] Fix instruction order with physical register (PR #99264)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 19:06:06 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Ryotaro KASUGA (kasuga-fj)
<details>
<summary>Changes</summary>
dependencies in same cycle
Dependency checks were insufficient when reordering instructions with physical register dependencies (i.e. Anti/Output dependencies). This could result in generating incorrect code.
The following output was dumped when applying MachiePipeliner to the attached test code:
```
cycle 23 (0) (180) %264:gpr64 = nsw SUBSXri %9:gpr64sp, 28, 0, implicit-def $nzcv
cycle 23 (0) (171) %258:ppr = CMPHS_PPzZZ_S %75:ppr_3b, %100:zpr, %257:zpr, implicit-def dead $nzcv
```
In this case, the order of SU(171) and SU(180) is reversed, affecting subsequent instructions of SU(180) that use the `$nzcv` register.
---
Patch is 31.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99264.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+7-4)
- (added) llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir (+452)
``````````diff
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 497e282bb9768..e2d099e64455d 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -3041,9 +3041,10 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
MoveUse = Pos;
}
// We did not handle HW dependences in previous for loop,
- // and we normally set Latency = 0 for Anti deps,
- // so may have nodes in same cycle with Anti denpendent on HW regs.
- else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
+ // and we normally set Latency = 0 for Anti/Output deps,
+ // so may have nodes in same cycle with Anti/Output dependent on HW regs.
+ else if ((S.getKind() == SDep::Anti || S.getKind() == SDep::Output) &&
+ stageScheduled(*I) == StageInst1) {
OrderBeforeUse = true;
if ((MoveUse == 0) || (Pos < MoveUse))
MoveUse = Pos;
@@ -3052,7 +3053,9 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
for (auto &P : SU->Preds) {
if (P.getSUnit() != *I)
continue;
- if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
+ if ((P.getKind() == SDep::Order || P.getKind() == SDep::Anti ||
+ P.getKind() == SDep::Output) &&
+ stageScheduled(*I) == StageInst1) {
OrderAfterDef = true;
MoveDef = Pos;
}
diff --git a/llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir b/llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir
new file mode 100644
index 0000000000000..7049d9ca043f7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir
@@ -0,0 +1,452 @@
+# RUN: llc -o - %s -mcpu=a64fx -aarch64-enable-pipeliner -pipeliner-max-mii=100 -pipeliner-enable-copytophi=0 -debug-only=pipeliner -run-pass=pipeliner -treat-scalable-fixed-error-as-warning 2>&1 | FileCheck %s
+
+# REQUIRES: asserts
+
+# Verify that the order of the instructions is correct if they are scheduled in
+# the same cycle and they have physical register dependencies.
+
+# CHECK: Schedule Found? 1
+# CHECK: cycle {{[0-9]+}} (0) {{.*}} SUBS{{.*}} implicit-def $nzcv
+# CHECK-NOT: cycle {{[0-9]+}} (0) {{.*}} implicit-def {{.*}} $nzcv
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+
+ declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #0
+
+ define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #1 {
+ entry:
+ %ret.i.i55 = alloca ptr, align 8
+ %ret.i.i = alloca ptr, align 8
+ %0 = load ptr, ptr %ret.i.i, align 8
+ br label %vector.ph
+
+ vector.ph: ; preds = %for.inc20.i, %entry
+ %lsr.iv1 = phi i64 [ %lsr.iv.next2, %for.inc20.i ], [ 0, %entry ]
+ %indvars.iv45.i = phi i64 [ 0, %entry ], [ %indvars.iv.next46.i, %for.inc20.i ]
+ %broadcast.splatinsert = insertelement <vscale x 4 x i64> poison, i64 %indvars.iv45.i, i64 0
+ %broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ br label %vector.body
+
+ vector.body: ; preds = %vector.body, %vector.ph
+ %lsr.iv3 = phi i64 [ %lsr.iv.next4, %vector.body ], [ %lsr.iv1, %vector.ph ]
+ %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 2800, %vector.ph ]
+ %vec.ind = phi <vscale x 4 x i64> [ zeroinitializer, %vector.ph ], [ %vec.ind.next.6, %vector.body ]
+ %1 = mul nuw nsw <vscale x 4 x i64> %vec.ind, %broadcast.splat
+ %2 = trunc <vscale x 4 x i64> %1 to <vscale x 4 x i32>
+ %3 = urem <vscale x 4 x i32> %2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %4 = add nuw nsw <vscale x 4 x i32> %3, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %5 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep16 = getelementptr i8, ptr %0, i64 %5
+ %6 = add nuw nsw <vscale x 4 x i64> %vec.ind, %broadcast.splat
+ %7 = trunc <vscale x 4 x i64> %6 to <vscale x 4 x i32>
+ %8 = urem <vscale x 4 x i32> %7, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %9 = icmp eq <vscale x 4 x i32> %8, zeroinitializer
+ %10 = urem <vscale x 4 x i32> %7, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %11 = icmp eq <vscale x 4 x i32> %10, zeroinitializer
+ %12 = or <vscale x 4 x i1> %9, %11
+ %13 = urem <vscale x 4 x i32> %7, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %14 = icmp eq <vscale x 4 x i32> %13, zeroinitializer
+ %15 = or <vscale x 4 x i1> %14, %12
+ %16 = select <vscale x 4 x i1> %15, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %4
+ store <vscale x 4 x i32> %16, ptr %scevgep16, align 4
+ %vec.ind.next = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 4, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %17 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next, %broadcast.splat
+ %18 = trunc <vscale x 4 x i64> %17 to <vscale x 4 x i32>
+ %19 = urem <vscale x 4 x i32> %18, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %20 = add nuw nsw <vscale x 4 x i32> %19, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %21 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep14 = getelementptr i8, ptr %0, i64 %21
+ %scevgep15 = getelementptr i8, ptr %scevgep14, i64 16
+ %22 = add nuw nsw <vscale x 4 x i64> %vec.ind.next, %broadcast.splat
+ %23 = trunc <vscale x 4 x i64> %22 to <vscale x 4 x i32>
+ %24 = urem <vscale x 4 x i32> %23, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %25 = icmp eq <vscale x 4 x i32> %24, zeroinitializer
+ %26 = urem <vscale x 4 x i32> %23, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %27 = icmp eq <vscale x 4 x i32> %26, zeroinitializer
+ %28 = or <vscale x 4 x i1> %25, %27
+ %29 = urem <vscale x 4 x i32> %23, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %30 = icmp eq <vscale x 4 x i32> %29, zeroinitializer
+ %31 = or <vscale x 4 x i1> %30, %28
+ %32 = select <vscale x 4 x i1> %31, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %20
+ store <vscale x 4 x i32> %32, ptr %scevgep15, align 4
+ %vec.ind.next.1 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %33 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.1, %broadcast.splat
+ %34 = trunc <vscale x 4 x i64> %33 to <vscale x 4 x i32>
+ %35 = urem <vscale x 4 x i32> %34, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %36 = add nuw nsw <vscale x 4 x i32> %35, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %37 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep12 = getelementptr i8, ptr %0, i64 %37
+ %scevgep13 = getelementptr i8, ptr %scevgep12, i64 32
+ %38 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.1, %broadcast.splat
+ %39 = trunc <vscale x 4 x i64> %38 to <vscale x 4 x i32>
+ %40 = urem <vscale x 4 x i32> %39, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %41 = icmp eq <vscale x 4 x i32> %40, zeroinitializer
+ %42 = urem <vscale x 4 x i32> %39, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %43 = icmp eq <vscale x 4 x i32> %42, zeroinitializer
+ %44 = or <vscale x 4 x i1> %41, %43
+ %45 = urem <vscale x 4 x i32> %39, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %46 = icmp eq <vscale x 4 x i32> %45, zeroinitializer
+ %47 = or <vscale x 4 x i1> %46, %44
+ %48 = select <vscale x 4 x i1> %47, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %36
+ store <vscale x 4 x i32> %48, ptr %scevgep13, align 4
+ %vec.ind.next.2 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 12, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %49 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.2, %broadcast.splat
+ %50 = trunc <vscale x 4 x i64> %49 to <vscale x 4 x i32>
+ %51 = urem <vscale x 4 x i32> %50, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %52 = add nuw nsw <vscale x 4 x i32> %51, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %53 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep10 = getelementptr i8, ptr %0, i64 %53
+ %scevgep11 = getelementptr i8, ptr %scevgep10, i64 48
+ %54 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.2, %broadcast.splat
+ %55 = trunc <vscale x 4 x i64> %54 to <vscale x 4 x i32>
+ %56 = urem <vscale x 4 x i32> %55, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %57 = icmp eq <vscale x 4 x i32> %56, zeroinitializer
+ %58 = urem <vscale x 4 x i32> %55, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %59 = icmp eq <vscale x 4 x i32> %58, zeroinitializer
+ %60 = or <vscale x 4 x i1> %57, %59
+ %61 = urem <vscale x 4 x i32> %55, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %62 = icmp eq <vscale x 4 x i32> %61, zeroinitializer
+ %63 = or <vscale x 4 x i1> %62, %60
+ %64 = select <vscale x 4 x i1> %63, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %52
+ store <vscale x 4 x i32> %64, ptr %scevgep11, align 4
+ %vec.ind.next.3 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 16, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %65 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.3, %broadcast.splat
+ %66 = trunc <vscale x 4 x i64> %65 to <vscale x 4 x i32>
+ %67 = urem <vscale x 4 x i32> %66, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %68 = add nuw nsw <vscale x 4 x i32> %67, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %69 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep8 = getelementptr i8, ptr %0, i64 %69
+ %scevgep9 = getelementptr i8, ptr %scevgep8, i64 64
+ %70 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.3, %broadcast.splat
+ %71 = trunc <vscale x 4 x i64> %70 to <vscale x 4 x i32>
+ %72 = urem <vscale x 4 x i32> %71, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %73 = icmp eq <vscale x 4 x i32> %72, zeroinitializer
+ %74 = urem <vscale x 4 x i32> %71, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %75 = icmp eq <vscale x 4 x i32> %74, zeroinitializer
+ %76 = or <vscale x 4 x i1> %73, %75
+ %77 = urem <vscale x 4 x i32> %71, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %78 = icmp eq <vscale x 4 x i32> %77, zeroinitializer
+ %79 = or <vscale x 4 x i1> %78, %76
+ %80 = select <vscale x 4 x i1> %79, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %68
+ store <vscale x 4 x i32> %80, ptr %scevgep9, align 4
+ %vec.ind.next.4 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 20, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %81 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.4, %broadcast.splat
+ %82 = trunc <vscale x 4 x i64> %81 to <vscale x 4 x i32>
+ %83 = urem <vscale x 4 x i32> %82, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %84 = add nuw nsw <vscale x 4 x i32> %83, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %85 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep6 = getelementptr i8, ptr %0, i64 %85
+ %scevgep7 = getelementptr i8, ptr %scevgep6, i64 80
+ %86 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.4, %broadcast.splat
+ %87 = trunc <vscale x 4 x i64> %86 to <vscale x 4 x i32>
+ %88 = urem <vscale x 4 x i32> %87, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %89 = icmp eq <vscale x 4 x i32> %88, zeroinitializer
+ %90 = urem <vscale x 4 x i32> %87, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %91 = icmp eq <vscale x 4 x i32> %90, zeroinitializer
+ %92 = or <vscale x 4 x i1> %89, %91
+ %93 = urem <vscale x 4 x i32> %87, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %94 = icmp eq <vscale x 4 x i32> %93, zeroinitializer
+ %95 = or <vscale x 4 x i1> %94, %92
+ %96 = select <vscale x 4 x i1> %95, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %84
+ store <vscale x 4 x i32> %96, ptr %scevgep7, align 4
+ %vec.ind.next.5 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 24, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %97 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.5, %broadcast.splat
+ %98 = trunc <vscale x 4 x i64> %97 to <vscale x 4 x i32>
+ %99 = urem <vscale x 4 x i32> %98, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %100 = add nuw nsw <vscale x 4 x i32> %99, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %101 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep = getelementptr i8, ptr %0, i64 %101
+ %scevgep5 = getelementptr i8, ptr %scevgep, i64 96
+ %102 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.5, %broadcast.splat
+ %103 = trunc <vscale x 4 x i64> %102 to <vscale x 4 x i32>
+ %104 = urem <vscale x 4 x i32> %103, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %105 = icmp eq <vscale x 4 x i32> %104, zeroinitializer
+ %106 = urem <vscale x 4 x i32> %103, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %107 = icmp eq <vscale x 4 x i32> %106, zeroinitializer
+ %108 = or <vscale x 4 x i1> %105, %107
+ %109 = urem <vscale x 4 x i32> %103, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %110 = icmp eq <vscale x 4 x i32> %109, zeroinitializer
+ %111 = or <vscale x 4 x i1> %110, %108
+ %112 = select <vscale x 4 x i1> %111, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %100
+ store <vscale x 4 x i32> %112, ptr %scevgep5, align 4
+ %vec.ind.next.6 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 28, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %lsr.iv.next = add nsw i64 %lsr.iv, -28
+ %lsr.iv.next4 = add nuw nsw i64 %lsr.iv3, 28
+ %113 = icmp eq i64 %lsr.iv.next, 0
+ br i1 %113, label %for.inc20.i, label %vector.body
+
+ for.inc20.i: ; preds = %vector.body
+ %indvars.iv.next46.i = add nuw nsw i64 %indvars.iv45.i, 1
+ %lsr.iv.next2 = add nuw nsw i64 %lsr.iv1, 2800
+ %exitcond48.not.i = icmp eq i64 %indvars.iv.next46.i, 2800
+ br i1 %exitcond48.not.i, label %init_array.exit, label %vector.ph
+
+ init_array.exit: ; preds = %for.inc20.i
+ call void @free(ptr noundef nonnull %0)
+ ret i32 0
+ }
+
+ attributes #0 = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="a64fx" "target-features"="+aes,+complxnum,+crc,+fp-armv8,+fullfp16,+lse,+neon,+outline-atomics,+perfmon,+ras,+rdm,+sha2,+sve,+v8.1a,+v8.2a,+v8a,-fmv" }
+ attributes #1 = { nounwind uwtable vscale_range(1,1) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="a64fx" "target-features"="+aes,+complxnum,+crc,+fp-armv8,+fullfp16,+lse,+neon,+outline-atomics,+perfmon,+ras,+rdm,+sha2,+sve,+v8.1a,+v8.2a,+v8a,-fmv" }
+
+...
+---
+name: main
+tracksRegLiveness: true
+stack:
+ - { id: 0, name: ret.i.i55, size: 8, alignment: 8, local-offset: -8 }
+ - { id: 1, name: ret.i.i, size: 8, alignment: 8, local-offset: -16 }
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ %18:gpr64all = COPY $xzr
+ %17:gpr64all = COPY %18
+ %19:gp...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/99264
More information about the llvm-commits
mailing list