[llvm-bugs] [Bug 47591] New: incorrect SelectionDAG Code generation with FrameIndex load/stores

via llvm-bugs llvm-bugs at lists.llvm.org
Sun Sep 20 23:30:21 PDT 2020


https://bugs.llvm.org/show_bug.cgi?id=47591

            Bug ID: 47591
           Summary: incorrect SelectionDAG Code generation with FrameIndex
                    load/stores
           Product: new-bugs
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: ys114321 at gmail.com
                CC: htmldeveloper at gmail.com, llvm-bugs at lists.llvm.org

The original issue is reported in iovisor/bpftrace:
   https://github.com/iovisor/bpftrace/issues/1305

I reproduced it in llvm10 and the latest trunk also has issues.

The following is the step to reproduce:

1. I have produced an IR file (ir.ll) below to reproduce the issue:

[yhs at devvm1886.ftw0 ~/work/test/ir]$ cat ir.ll                                  
; ModuleID = 'bpftrace'
source_filename = "bpftrace"
target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128"
target triple = "bpf-pc-linux"

%printf_t = type { i64, i64 }

; Function Attrs: nounwind
declare i64 @llvm.bpf.pseudo(i64 %0, i64 %1) #0

define i64 @"kprobe:blk_update_request"(i8* %0) local_unnamed_addr section
"s_kprobe:blk_update_request_1" {
entry:
  %"struct kernfs_node.parent" = alloca i64, align 8
  %printf_args = alloca %printf_t, align 8
  %"struct cgroup.kn" = alloca i64, align 8
  %"struct cgroup_subsys_state.cgroup" = alloca i64, align 8
  %"struct blkcg_gq.blkcg" = alloca i64, align 8
  %"struct bio.bi_blkg" = alloca i64, align 8
  %"struct request.bio" = alloca i64, align 8
  %1 = getelementptr i8, i8* %0, i64 112
  %2 = bitcast i8* %1 to i64*
  %arg0 = load volatile i64, i64* %2, align 8
  %3 = add i64 %arg0, 56
  %4 = bitcast i64* %"struct request.bio" to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %4)
  %probe_read = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64* nonnull
%"struct request.bio", i32 8, i64 %3)
  %5 = load i64, i64* %"struct request.bio", align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %4)
  %6 = add i64 %5, 72
  %7 = bitcast i64* %"struct bio.bi_blkg" to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %7)
  %probe_read1 = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64*
nonnull %"struct bio.bi_blkg", i32 8, i64 %6)
  %8 = load i64, i64* %"struct bio.bi_blkg", align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %7)
  %9 = add i64 %8, 40
  %10 = bitcast i64* %"struct blkcg_gq.blkcg" to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %10)
  %probe_read2 = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64*
nonnull %"struct blkcg_gq.blkcg", i32 8, i64 %9)
  %11 = load i64, i64* %"struct blkcg_gq.blkcg", align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %10)
  %12 = bitcast i64* %"struct cgroup_subsys_state.cgroup" to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %12)
  %probe_read3 = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64*
nonnull %"struct cgroup_subsys_state.cgroup", i32 8, i64 %11)
  %13 = load i64, i64* %"struct cgroup_subsys_state.cgroup", align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %12)
  %14 = add i64 %13, 288
  %15 = bitcast i64* %"struct cgroup.kn" to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %15)
  %probe_read4 = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64*
nonnull %"struct cgroup.kn", i32 8, i64 %14)
  %16 = load i64, i64* %"struct cgroup.kn", align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %15)
  %17 = bitcast %printf_t* %printf_args to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %17)
  %18 = add i64 %16, 8
  %19 = bitcast i64* %"struct kernfs_node.parent" to i8*
  %20 = getelementptr inbounds %printf_t, %printf_t* %printf_args, i64 0, i32 0
  store i64 0, i64* %20, align 8
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %19)
  %probe_read5 = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64*
nonnull %"struct kernfs_node.parent", i32 8, i64 %18)
  %21 = load i64, i64* %"struct kernfs_node.parent", align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %19)
  %22 = getelementptr inbounds %printf_t, %printf_t* %printf_args, i64 0, i32 1
  store i64 %21, i64* %22, align 8
  %pseudo = call i64 @llvm.bpf.pseudo(i64 1, i64 1)
  %get_cpu_id = call i64 inttoptr (i64 8 to i64 ()*)()
  %perf_event_output = call i64 inttoptr (i64 25 to i64 (i8*, i64, i64,
%printf_t*, i64)*)(i8* %0, i64 %pseudo, i64 %get_cpu_id, %printf_t* nonnull
%printf_args, i64 16)
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %17)
  ret i64 0
}

; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #1

; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #1

attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind willreturn }

2. compile with
      llc -march=bpf -filetype=obj ir.ll
   and do an objdump,
      llvm-objdump -d ir.o

...
      29:       85 00 00 00 04 00 00 00 call 4
      30:       b7 01 00 00 00 00 00 00 r1 = 0
      31:       7b 1a e8 ff 00 00 00 00 *(u64 *)(r10 - 24) = r1
      32:       79 a3 e8 ff 00 00 00 00 r3 = *(u64 *)(r10 - 24)
      33:       07 03 00 00 08 00 00 00 r3 += 8
      34:       bf a1 00 00 00 00 00 00 r1 = r10
      35:       07 01 00 00 f8 ff ff ff r1 += -8
      36:       b7 02 00 00 08 00 00 00 r2 = 8
      37:       85 00 00 00 04 00 00 00 call 4
...

the corresponding ir (for the above assembly code) looks like
  %15 = bitcast i64* %"struct cgroup.kn" to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %15)
  %probe_read4 = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64*
nonnull %"struct cgroup.kn", i32 8, i64 %14)       
  %16 = load i64, i64* %"struct cgroup.kn", align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* nonnull %15)
  %17 = bitcast %printf_t* %printf_args to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %17)
  %18 = add i64 %16, 8
  %19 = bitcast i64* %"struct kernfs_node.parent" to i8*
  %20 = getelementptr inbounds %printf_t, %printf_t* %printf_args, i64 0, i32 0
  store i64 0, i64* %20, align 8
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* nonnull %19)
  %probe_read5 = call i64 inttoptr (i64 4 to i64 (i64*, i32, i64)*)(i64*
nonnull %"struct kernfs_node.parent", i32 8, i64 %18)

The load and store in the IR got reordered. I think they should not since load
happens with frame lifetime %15 and store happens with frame lifetime %17. And
it is possible both frames may share the same frame.

The reordering happens in selectionDag. I did some debug with
   llc -march=bpf -filetype=obj ir.ll -debug-only=dagcombine -debug-only=isel
and the issue may be related to
   BaseIndexOffset::computeAliasing

Esp. the following code:

  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
  // able to calculate their relative offset if at least one arises
  // from an alloca. However, these allocas cannot overlap and we
  // can infer there is no alias.
  if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
    if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
      MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
      // If the base are the same frame index but the we couldn't find a
      // constant offset, (indices are different) be conservative.
      if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
                     !MFI.isFixedObjectIndex(B->getIndex()))) {
        IsAlias = false;
        return true; 
      } 
    }

The above code seems implying that different aloca's (different frameindex)
will not alias to each other so IsAlias is false. But maybe it is possible that
different frameindex may reuse the same stack during later optimization?

The IR is generated from bpftrace directly. I don't know whether the generated
IR is 100% correct or not. Maybe people familiar with SelectionDAG can help
check too.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200921/fba7627e/attachment.html>


More information about the llvm-bugs mailing list