[PATCH] D122918: [RISCV][CodeGen] Support float-arith in Zfinx

Tue Apr 12 15:35:10 PDT 2022

hughperkins added a comment.

Note: I get a crash when I try to compile some LLVM IR. Crash trace:

  $ bin/llc ~/git/verigpu/examples/cpp_single_source/sum_floats/build_bash/sum_floats-device.ll -o ~/git/verigpu/examples/cpp_single_source/sum_floats/build_bash/sum_floats-device.s --march=riscv32 -mattr=+m,+zfinx
  'sm_35' is not a recognized processor for this target (ignoring processor)
  '+ptx32' is not a recognized feature for this target (ignoring feature)
  '+sm_35' is not a recognized feature for this target (ignoring feature)
  'sm_35' is not a recognized processor for this target (ignoring processor)
  'sm_35' is not a recognized processor for this target (ignoring processor)
  '+ptx32' is not a recognized feature for this target (ignoring feature)
  '+sm_35' is not a recognized feature for this target (ignoring feature)
  'sm_35' is not a recognized processor for this target (ignoring processor)
  LLVM ERROR: Cannot select: t8: ch = store<(store (s32) into %ir.26)> t7, t4, t6, undef:i32
    t4: f32,ch = load<(dereferenceable load (s32) from %ir.7)> t0, FrameIndex:i32<3>, undef:i32
      t1: i32 = FrameIndex<3>
      t3: i32 = undef
    t6: i32,ch = load<(dereferenceable load (s32) from %ir.6, align 8)> t0, FrameIndex:i32<2>, undef:i32
      t5: i32 = FrameIndex<2>
      t3: i32 = undef
    t3: i32 = undef
  In function: _Z10sum_floatsPfjS_
  PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
  Stack dump:
  0.	Program arguments: bin/llc /Users/hp/git/verigpu/examples/cpp_single_source/sum_floats/build_bash/sum_floats-device.ll -o /Users/hp/git/verigpu/examples/cpp_single_source/sum_floats/build_bash/sum_floats-device.s --march=riscv32 -mattr=+m,+zfinx
  1.	Running pass 'Function Pass Manager' on module '/Users/hp/git/verigpu/examples/cpp_single_source/sum_floats/build_bash/sum_floats-device.ll'.
  2.	Running pass 'RISCV DAG->DAG Pattern Instruction Selection' on function '@_Z10sum_floatsPfjS_'
  Stack dump without symbol names (ensure you have llvm-symbolizer in your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point to it):
  0  llc                      0x000000010391162d llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) + 61
  1  llc                      0x0000000103911bdb PrintStackTraceSignalHandler(void*) + 27
  2  llc                      0x000000010390f85b llvm::sys::RunSignalHandlers() + 139
  3  llc                      0x00000001039137d8 SignalHandler(int) + 232
  4  libsystem_platform.dylib 0x00007fff206e5d7d _sigtramp + 29
  5  libsystem_platform.dylib 000000000000000000 _sigtramp + 18446603339972059808
  6  libsystem_c.dylib        0x00007fff205f5406 abort + 125
  7  llc                      0x0000000103772e4a llvm::report_fatal_error(llvm::Twine const&, bool) + 394
  8  llc                      0x00000001035fa305 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) + 725
  9  llc                      0x00000001035f5d72 llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) + 26690
  10 llc                      0x0000000101176c6c llvm::RISCVDAGToDAGISel::SelectCode(llvm::SDNode*) + 44
  11 llc                      0x0000000101175e73 llvm::RISCVDAGToDAGISel::Select(llvm::SDNode*) + 31299
  12 llc                      0x00000001035e67fd llvm::SelectionDAGISel::DoInstructionSelection() + 1629
  13 llc                      0x00000001035e4f4d llvm::SelectionDAGISel::CodeGenAndEmitDAG() + 6205
  14 llc                      0x00000001035e356f llvm::SelectionDAGISel::SelectBasicBlock(llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::Instruction, true, false, void>, false, true>, llvm::ilist_iterator<llvm::ilist_detail::node_options<llvm::Instruction, true, false, void>, false, true>, bool&) + 399
  15 llc                      0x00000001035e1649 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) + 6329
  16 llc                      0x00000001035ddd2c llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) + 2268
  17 llc                      0x0000000101178668 llvm::RISCVDAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&) + 56
  18 llc                      0x0000000101e88dcd llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 541
  19 llc                      0x000000010280ddbc llvm::FPPassManager::runOnFunction(llvm::Function&) + 700
  20 llc                      0x0000000102815ce5 llvm::FPPassManager::runOnModule(llvm::Module&) + 117
  21 llc                      0x000000010280e784 (anonymous namespace)::MPPassManager::runOnModule(llvm::Module&) + 772
  22 llc                      0x000000010280e2a8 llvm::legacy::PassManagerImpl::run(llvm::Module&) + 296
  23 llc                      0x0000000102816081 llvm::legacy::PassManager::run(llvm::Module&) + 33
  24 llc                      0x00000001010b3a16 compileModule(char**, llvm::LLVMContext&) + 6742
  25 llc                      0x00000001010b177d main + 1501
  26 libdyld.dylib            0x00007fff206bbf3d start + 1
  Abort trap: 6

Corresponding LLVM IR:

  ; ModuleID = '../sum_floats.cpp'
  source_filename = "../sum_floats.cpp"
  target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
  target triple = "nvptx64-nvidia-cuda"

  ; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
  define dso_local void @_Z10sum_floatsPfjS_(float* noundef %0, i32 noundef %1, float* noundef %2) #0 {
    %4 = alloca float*, align 8
    %5 = alloca i32, align 4
    %6 = alloca float*, align 8
    %7 = alloca float, align 4
    %8 = alloca i32, align 4
    store float* %0, float** %4, align 8
    store i32 %1, i32* %5, align 4
    store float* %2, float** %6, align 8
    store float 0.000000e+00, float* %7, align 4
    store i32 0, i32* %8, align 4
    br label %9

  9:                                                ; preds = %21, %3
    %10 = load i32, i32* %8, align 4
    %11 = load i32, i32* %5, align 4
    %12 = icmp ult i32 %10, %11
    br i1 %12, label %13, label %24

  13:                                               ; preds = %9
    %14 = load float*, float** %4, align 8
    %15 = load i32, i32* %8, align 4
    %16 = zext i32 %15 to i64
    %17 = getelementptr inbounds float, float* %14, i64 %16
    %18 = load float, float* %17, align 4
    %19 = load float, float* %7, align 4
    %20 = fadd contract float %19, %18
    store float %20, float* %7, align 4
    br label %21

  21:                                               ; preds = %13
    %22 = load i32, i32* %8, align 4
    %23 = add i32 %22, 1
    store i32 %23, i32* %8, align 4
    br label %9, !llvm.loop !7

  24:                                               ; preds = %9
    %25 = load float, float* %7, align 4
    %26 = load float*, float** %6, align 8
    store float %25, float* %26, align 4
    ret void
  }

  attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_35" "target-features"="+ptx32,+sm_35" }

  !nvvm.annotations = !{!0}
  !llvm.module.flags = !{!1, !2, !3, !4, !5}
  !llvm.ident = !{!6}

  !0 = !{void (float*, i32, float*)* @_Z10sum_floatsPfjS_, !"kernel", i32 1}
  !1 = !{i32 1, !"wchar_size", i32 4}
  !2 = !{i32 4, !"nvvm-reflect-ftz", i32 0}
  !3 = !{i32 7, !"PIC Level", i32 2}
  !4 = !{i32 7, !"PIE Level", i32 2}
  !5 = !{i32 7, !"frame-pointer", i32 2}
  !6 = !{!"clang version 14.0.0 (https://github.com/tru/llvm-release-build fc075d7c96fe7c992dde351695a5d25fe084794a)"}
  !7 = distinct !{!7, !8}
  !8 = !{!"llvm.loop.mustprogress"}

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122918/new/

https://reviews.llvm.org/D122918