[llvm] [MachinePipeliner] Add validation for missed dependencies (PR #135148)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 15 16:10:55 PDT 2025


aankit-ca wrote:

@kasuga-fj @iajbar The degradations are much less this time. Although we see a few cases where some unnecessary loop carried dependences are being added. Consider this example below:

```
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
define dso_local void @foo(ptr noundef readonly captures(none) %in, ptr noalias noundef writeonly captures(none) %out, i32 noundef %width) local_unnamed_addr {
entry:
  %cmp7 = icmp sgt i32 %width, 0
  br i1 %cmp7, label %for.body, label %for.end

for.body:                                         ; preds = %entry, %for.body
  %optr.010 = phi ptr [ %incdec.ptr3, %for.body ], [ %out, %entry ]
  %iptr.09 = phi ptr [ %incdec.ptr1, %for.body ], [ %in, %entry ]
  %i.08 = phi i32 [ %sub, %for.body ], [ %width, %entry ]
  %incdec.ptr = getelementptr inbounds nuw i8, ptr %iptr.09, i32 128
  %0 = load <32 x i32>, ptr %iptr.09, align 128, !tbaa !5
  %incdec.ptr1 = getelementptr inbounds nuw i8, ptr %iptr.09, i32 256
  %1 = load <32 x i32>, ptr %incdec.ptr, align 128, !tbaa !5
  %incdec.ptr2 = getelementptr inbounds nuw i8, ptr %optr.010, i32 128
  store <32 x i32> %0, ptr %optr.010, align 128, !tbaa !5
  %incdec.ptr3 = getelementptr inbounds nuw i8, ptr %optr.010, i32 256
  store <32 x i32> %1, ptr %incdec.ptr2, align 128, !tbaa !5
  %sub = add nsw i32 %i.08, -128
  %cmp = icmp samesign ugt i32 %i.08, 128
  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !8

for.end:                                          ; preds = %for.body, %entry
  ret void
}

!llvm.module.flags = !{!0, !1, !2, !3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!6, !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C/C++ TBAA"}
!8 = distinct !{!8, !9, !10}
!9 = !{!"llvm.loop.mustprogress"}
!10 = !{!"llvm.loop.unroll.disable"}
```

In this case we see unnecessary loop carried dependencies between the loads and stores:

```
SU(1):   %3:intregs = PHI %7:intregs, %bb.1, %6:intregs, %bb.2
SU(2):   %12:hvxvr = V6_vL32b_ai %3:intregs, 0 :: (load (s1024) from %ir.iptr.09, !tbaa !4)
SU(0):   %2:intregs = PHI %8:intregs, %bb.1, %5:intregs, %bb.2
SU(3):   V6_vS32b_ai %2:intregs, 0, %12:hvxvr :: (store (s1024) into %ir.optr.010, !tbaa !4)
SU(4):   %13:hvxvr = V6_vL32b_ai %3:intregs, 128 :: (load (s1024) from %ir.cgep, !tbaa !4)
SU(5):   V6_vS32b_ai %2:intregs, 128, %13:hvxvr :: (store (s1024) into %ir.cgep3, !tbaa !4)
SU(6):   %5:intregs = A2_addi %2:intregs, 256
SU(7):   %6:intregs = A2_addi %3:intregs, 256

Loop Carried Edges:
  Loop carried edges from SU(2)
    Order
      SU(5)
  Loop carried edges from SU(3)
    Order
      SU(4)
  Loop carried edges from SU(4)
    Order
      SU(3)
  Loop carried edges from SU(5)
    Order
      SU(2)
```

Can you check and handle cases to eliminate such loop carried dependencies?

https://github.com/llvm/llvm-project/pull/135148


More information about the llvm-commits mailing list