[PATCH] D138899: [DAGCombiner] handle more store value forwarding

Mon Feb 13 19:04:54 PST 2023

aeubanks added a comment.

I believe this is causing a miscompile on this <https://crsrc.org/c/chrome/browser/dom_distiller/dom_distiller_service_factory.cc;drc=8ce391bed5ee336e59ccd87b8869760c30e2aad7;l=27> code.

  $ cat /tmp/a.ll
  source_filename = "../../chrome/browser/dom_distiller/dom_distiller_service_factory.cc"
  target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
  target triple = "i686-unknown-linux-android24"

  %0 = type { %1 }
  %1 = type { %2 }
  %2 = type { ptr }
  %3 = type { %4 }
  %4 = type { %5 }
  %5 = type { ptr }
  %6 = type { %7 }
  %7 = type { %8 }
  %8 = type { ptr }
  %9 = type { %10 }
  %10 = type { %11 }
  %11 = type { ptr }

  @g = external hidden unnamed_addr constant { [5 x ptr], [9 x ptr] }, align 4
  @g2 = external hidden unnamed_addr constant { [5 x ptr] }, align 4

  ; Function Attrs: minsize nounwind null_pointer_is_valid optsize uwtable
  define hidden void @f(ptr noundef align 4 dereferenceable_or_null(48) %0, ptr noundef byval(%0) align 4 %1, ptr noundef byval(%3) align 4 %2, ptr noundef byval(%6) align 4 %3, ptr noundef byval(%9) align 4 %4) unnamed_addr #0 align 2 {
    %6 = alloca %0, align 4
    %7 = alloca %3, align 4
    %8 = alloca %6, align 4
    %9 = alloca %9, align 4
    store ptr getelementptr inbounds ({ [5 x ptr] }, ptr @g2, i32 0, inrange i32 0, i32 2), ptr %0, align 4
    %10 = getelementptr inbounds i8, ptr %0, i32 4
    %11 = load ptr, ptr %1, align 4
    store ptr null, ptr %1, align 4
    store ptr %11, ptr %6, align 4
    %12 = load ptr, ptr %2, align 4
    store ptr null, ptr %2, align 4
    store ptr %12, ptr %7, align 4
    %13 = load ptr, ptr %3, align 4
    store ptr null, ptr %3, align 4
    store ptr %13, ptr %8, align 4
    %14 = load ptr, ptr %4, align 4
    store ptr null, ptr %4, align 4
    store ptr %14, ptr %9, align 4
    tail call void @h1(ptr noundef align 4 dereferenceable_or_null(44) %10, ptr noundef nonnull byval(%0) align 4 %6, ptr noundef nonnull byval(%3) align 4 %7, ptr noundef nonnull byval(%6) align 4 %8, ptr noundef nonnull byval(%9) align 4 %9) #2
    store ptr getelementptr inbounds ({ [5 x ptr], [9 x ptr] }, ptr @g, i32 0, inrange i32 0, i32 2), ptr %0, align 4
    store ptr getelementptr inbounds ({ [5 x ptr], [9 x ptr] }, ptr @g, i32 0, inrange i32 1, i32 2), ptr %10, align 4
    call void @h2(ptr noundef align 4 dereferenceable_or_null(4) %4) #2
    call void @h3(ptr noundef align 4 dereferenceable_or_null(4) %3) #2
    call void @h4(ptr noundef align 4 dereferenceable_or_null(4) %2) #2
    call void @h5(ptr noundef align 4 dereferenceable_or_null(4) %1) #2
    ret void
  }

  ; Function Attrs: minsize null_pointer_is_valid optsize
  declare void @h1(ptr noundef align 4 dereferenceable_or_null(44), ptr noundef byval(%0) align 4, ptr noundef byval(%3) align 4, ptr noundef byval(%6) align 4, ptr noundef byval(%9) align 4) unnamed_addr #1

  ; Function Attrs: minsize nounwind null_pointer_is_valid optsize uwtable
  declare hidden void @h2(ptr noundef align 4 dereferenceable_or_null(4)) unnamed_addr #0 align 2

  ; Function Attrs: minsize nounwind null_pointer_is_valid optsize uwtable
  declare hidden void @h3(ptr noundef align 4 dereferenceable_or_null(4)) unnamed_addr #0 align 2

  ; Function Attrs: minsize nounwind null_pointer_is_valid optsize uwtable
  declare hidden void @h4(ptr noundef align 4 dereferenceable_or_null(4)) unnamed_addr #0 align 2

  ; Function Attrs: minsize nounwind null_pointer_is_valid optsize uwtable
  declare hidden void @h5(ptr noundef align 4 dereferenceable_or_null(4)) unnamed_addr #0 align 2

  attributes #0 = { minsize nounwind null_pointer_is_valid optsize uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="i686" "target-features"="+cx8,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" }
  attributes #1 = { minsize null_pointer_is_valid optsize "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="i686" "target-features"="+cx8,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" }
  attributes #2 = { minsize nounwind optsize }

  !llvm.linker.options = !{}
  !llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6}

  !0 = !{i32 1, !"NumRegisterParameters", i32 0}
  !1 = !{i32 7, !"Dwarf Version", i32 4}
  !2 = !{i32 2, !"Debug Info Version", i32 3}
  !3 = !{i32 1, !"wchar_size", i32 4}
  !4 = !{i32 8, !"PIC Level", i32 2}
  !5 = !{i32 7, !"uwtable", i32 2}
  !6 = !{i32 7, !"frame-pointer", i32 1}

  $ llc /tmp/a.ll # one commit before this change
  ...
  	movl	8(%ebp), %ebx
  	movl	$g2+8, (%ebx)
  	leal	12(%ebp), %ecx
  	movl	(%ecx), %eax
  	movl	%ecx, %edx
  	leal	16(%ebp), %ecx
  	movl	(%ecx), %ecx
  	movl	%eax, -40(%ebp)
  	leal	20(%ebp), %edi
  	movl	%ecx, -32(%ebp)
  	movl	(%edi), %eax
  	movl	%eax, -24(%ebp)
  	leal	24(%ebp), %esi
  	movl	(%esi), %eax
  	movl	%eax, -16(%ebp)
  	xorps	%xmm0, %xmm0
  	movups	%xmm0, (%edx) ; sets 12(%ebp), 16(%ebp), 20(%ebp), 24(%ebp) to 0
  	leal	4(%ebx), %eax
  	movl	-16(%ebp), %ecx
  	movl	%ecx, 16(%esp) ; stores -16(%ebp), which is also 24(%ebp), to 16(%esp)
  	movl	-24(%ebp), %ecx
  	movl	%ecx, 12(%esp)
  	movl	-32(%ebp), %ecx
  	movl	%ecx, 8(%esp)
  	movl	-40(%ebp), %ecx
  	movl	%ecx, 4(%esp)
  	movl	%eax, (%esp)
  	calll	h1 at PLT
  ...
  $ llc /tmp/a.ll # at this commit
  ...
  	movl	8(%ebp), %ebx
  	movl	$g2+8, (%ebx)
  	leal	12(%ebp), %ecx
  	movl	(%ecx), %eax
  	movl	%ecx, %edx
  	xorps	%xmm0, %xmm0
  	leal	16(%ebp), %ecx
  	movl	(%ecx), %ecx
  	movl	%eax, -32(%ebp)
  	leal	20(%ebp), %edi
  	movl	%ecx, -24(%ebp)
  	movl	(%edi), %eax
  	movl	%eax, -16(%ebp)
  	leal	24(%ebp), %esi
  	movups	%xmm0, (%edx) ; sets 12(%ebp), 16(%ebp), 20(%ebp), 24(%ebp) to 0
  	movl	(%esi), %eax ; loads 24(%ebp) to %eax, which is now 0
  	movl	%eax, -40(%ebp)
  	leal	4(%ebx), %ecx
  	movl	%eax, 16(%esp) ; stores 0 to 16(%esp) which is wrong!!!
  	movl	-16(%ebp), %eax
  	movl	%eax, 12(%esp)
  	movl	-24(%ebp), %eax
  	movl	%eax, 8(%esp)
  	movl	-32(%ebp), %eax
  	movl	%eax, 4(%esp)
  	movl	%ecx, (%esp)
  	calll	h1 at PLT
  ...

see my added comments in the llc output

Going to revert this commit (and the follow up commit)

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138899/new/

https://reviews.llvm.org/D138899