[PATCH] D108782: [MergeICmps] Ignore clobbering instructions before the loads

Fri Sep 17 14:30:24 PDT 2021

nikic added a comment.

Thanks for the report! This seems to have exposed a pre-existing problem. Here's a variant that fails also prior to this patch (note the commented init calls):

  target triple = "x86_64-unknown-linux-gnu"

  %"struct.a::c" = type { i32, i32*, i8* }

  define i1 @test() {
  entry:
    %h = alloca %"struct.a::c", align 8
    %i = alloca %"struct.a::c", align 8
    ;call void @init(%"struct.a::c"* %h)
    ;call void @init(%"struct.a::c"* %i)
    %e = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %h, i64 0, i32 2
    %v3 = load i8*, i8** %e, align 8
    %e2 = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %i, i64 0, i32 2
    %v4 = load i8*, i8** %e2, align 8
    %cmp = icmp eq i8* %v3, %v4
    br i1 %cmp, label %land.lhs.true, label %land.end

  land.lhs.true:                                    ; preds = %entry
    %d = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %h, i64 0, i32 1
    %v5 = load i32*, i32** %d, align 8
    %d3 = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %i, i64 0, i32 1
    %v6 = load i32*, i32** %d3, align 8
    %cmp4 = icmp eq i32* %v5, %v6
    br i1 %cmp4, label %land.rhs, label %land.end

  land.rhs:                                         ; preds = %land.lhs.true
    %j = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %h, i64 0, i32 0
    %v7 = load i32, i32* %j, align 8
    %j5 = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %i, i64 0, i32 0
    %v8 = load i32, i32* %j5, align 8
    %cmp6 = icmp eq i32 %v7, %v8
    br label %land.end

  land.end:                                         ; preds = %land.rhs, %land.lhs.true, %entry
    %v9 = phi i1 [ false, %land.lhs.true ], [ false, %entry ], [ %cmp6, %land.rhs ]
    ret i1 %v9
  }

  declare void @init(%"struct.a::c"*)

The result of the transform looks like this:

  define i1 @test() {
  land.rhs2:
    %0 = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %h, i64 0, i32 0
    %1 = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %i, i64 0, i32 0
    %2 = load i32, i32* %0, align 4
    %3 = load i32, i32* %1, align 4
    %4 = icmp eq i32 %2, %3
    br i1 %4, label %"land.lhs.true+entry", label %land.end

  "land.lhs.true+entry":                            ; preds = %land.rhs2
    %h = alloca %"struct.a::c", align 8
    %i = alloca %"struct.a::c", align 8
    %5 = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %h, i64 0, i32 1
    %6 = getelementptr inbounds %"struct.a::c", %"struct.a::c"* %i, i64 0, i32 1
    %cstr = bitcast i32** %5 to i8*
    %cstr1 = bitcast i32** %6 to i8*
    %memcmp = call i32 @memcmp(i8* %cstr, i8* %cstr1, i64 16)
    %7 = icmp eq i32 %memcmp, 0
    br label %land.end

  land.end:                                         ; preds = %land.rhs2, %"land.lhs.true+entry"
    %v9 = phi i1 [ %7, %"land.lhs.true+entry" ], [ false, %land.rhs2 ]
    ret i1 %v9
  }

The comparison at offset 0 doesn't get merged as it's non-contiguous, but for some reason it also gets moved to the start, such that the allocas are effectively sunk,

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108782/new/

https://reviews.llvm.org/D108782