[llvm] [RISCV][VLOPT] Compute demanded VLs up front (PR #124530)

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 2 10:18:59 PST 2025


asb wrote:

I've directly committed https://github.com/llvm/llvm-project/commit/52c116218b61c088ac77f26c7b57347a5f54224d to fix the fact that `DemandedVLs` is never cleared. I'll leave details of my reproducer here, but because triggering the failed assertions depends on the precise sequence of memory reuse it may not be portable to other systems (which is why I haven't committed it as a test case).

After some reduction, the following invoked with `llc -enable-machine-outliner=never < reduced.ll` will case the assertion `isCandidate(UserMI)` in `getMinimumVLForUser` to fail. Seemingly, that option happens to change the memory allocation pattern such that a candidate MachineInstr* added to DemandedVLs when iterating over some earlier function aliases with a MachinInstr* when iterating over a later function (reallocations have presumably taken place).

I've directly committed what seems to be the most obvious fix.

```
; ModuleID = 'out.ll'
source_filename = "pr53645-2.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

@u = global [2 x <8 x i16>] [<8 x i16> <i16 73, i16 -5, i16 0, i16 174, i16 921, i16 -1, i16 17, i16 178>, <8 x i16> <i16 1, i16 8173, i16 -1, i16 -64, i16 12, i16 29612, i16 128, i16 8912>]
@s = global [2 x <8 x i16>] [<8 x i16> <i16 73, i16 -9123, i16 32761, i16 8191, i16 16371, i16 1201, i16 12701, i16 9999>, <8 x i16> <i16 9903, i16 -1, i16 -7323, i16 0, i16 -7, i16 -323, i16 9124, i16 -9199>]

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable vscale_range(2,1024)
define void @ur65656565(ptr noundef writeonly captures(none) initializes((0, 16)) %x, ptr noundef readonly captures(none) %y) #0 {
entry:
  %0 = load <8 x i16>, ptr %y, align 16, !tbaa !9
  %rem = urem <8 x i16> %0, <i16 6, i16 5, i16 6, i16 5, i16 6, i16 5, i16 6, i16 5>
  store <8 x i16> %rem, ptr %x, align 16, !tbaa !9
  ret void
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable vscale_range(2,1024)
define void @uq77777777(ptr noundef writeonly captures(none) initializes((0, 16)) %x, ptr noundef readonly captures(none) %y) #0 {
entry:
  %0 = load <8 x i16>, ptr %y, align 16, !tbaa !9
  %div = udiv <8 x i16> %0, splat (i16 7)
  store <8 x i16> %div, ptr %x, align 16, !tbaa !9
  ret void
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable vscale_range(2,1024)
define void @sq77777777(ptr noundef writeonly captures(none) initializes((0, 16)) %x, ptr noundef readonly captures(none) %y) #0 {
entry:
  %0 = load <8 x i16>, ptr %y, align 16, !tbaa !9
  %div = sdiv <8 x i16> %0, splat (i16 7)
  store <8 x i16> %div, ptr %x, align 16, !tbaa !9
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #1

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #1

attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+b,+c,+d,+f,+m,+relax,+supm,+v,+za64rs,+zaamo,+zalrsc,+zawrs,+zba,+zbb,+zbs,+zca,+zcb,+zcmop,+zfa,+zfhmin,+zic64b,+zicbom,+zicbop,+zicboz,+ziccamoa,+ziccif,+zicclsm,+ziccrse,+zicntr,+zicond,+zicsr,+zihintntl,+zihintpause,+zihpm,+zimop,+zkt,+zmmul,+zvbb,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvfhmin,+zvkb,+zvkt,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-sdext,-experimental-sdtrig,-experimental-smctr,-experimental-ssctr,-experimental-svukte,-experimental-xqcia,-experimental-xqciac,-experimental-xqcicli,-experimental-xqcicm,-experimental-xqcics,-experimental-xqcicsr,-experimental-xqciint,-experimental-xqcilo,-experimental-xqcilsm,-experimental-xqcisls,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xmipscmove,-xmipslsp,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-zabha,-zacas,-zama16b,-zbc,-zbkb,-zbkc,-zbkx,-zcd,-zce,-zcf,-zcmp,-zcmt,-zdinx,-zfbfmin,-zfh,-zfinx,-zhinx,-zhinxmin,-zifencei,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-ztso,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }

!llvm.module.flags = !{!0, !1, !2, !4, !5, !6, !7}
!llvm.ident = !{!8}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"target-abi", !"lp64d"}
!2 = !{i32 6, !"riscv-isa", !3}
!3 = !{!"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_supm1p0"}
!4 = !{i32 8, !"PIC Level", i32 2}
!5 = !{i32 7, !"PIE Level", i32 2}
!6 = !{i32 7, !"uwtable", i32 2}
!7 = !{i32 8, !"SmallDataLimit", i32 0}
!8 = !{!"clang version 21.0.0git"}
!9 = !{!10, !10, i64 0}
!10 = !{!"omnipotent char", !11, i64 0}
!11 = !{!"Simple C/C++ TBAA"}
```

https://github.com/llvm/llvm-project/pull/124530


More information about the llvm-commits mailing list