[PATCH] D83139: [InstCombine] Always try to invert non-canonical predicate of an icmp

Roman Lebedev via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 4 03:45:44 PDT 2020


lebedev.ri added a comment.

For example, we could end up with the following after `-alloca-promotion-coercion -mem2reg` on https://godbolt.org/z/bwuEmJ

  *** IR Dump After Promote Memory to Register *** (function: _Z4loopi)
  ; ModuleID = '/tmp/test.cpp'
  source_filename = "/tmp/test.cpp"
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
  ; Function Attrs: uwtable
  define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
  entry:
    %0 = shufflevector <8 x i8> undef, <8 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    br label %for.cond
  
  for.cond:                                         ; preds = %for.body, %entry
    %storage.apc.retyped.0 = phi <8 x i8> [ %0, %entry ], [ %10, %for.body ]
    %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    %1 = mul i32 %i.0, -1
    %2 = trunc i32 %1 to i1
    %3 = zext i1 %2 to i64
    %cmp = icmp ne i32 %i.0, %width
    br i1 %cmp, label %for.body, label %for.cond.cleanup
  
  for.cond.cleanup:                                 ; preds = %for.cond
    ret void
  
  for.body:                                         ; preds = %for.cond
    %4 = bitcast <8 x i8> %storage.apc.retyped.0 to <2 x i32>
    %5 = extractelement <2 x i32> %4, i64 %3
    %call = call i32 @_Z3adji(i32 %5)
    %6 = bitcast <8 x i8> %storage.apc.retyped.0 to <2 x i32>
    %7 = extractelement <2 x i32> %6, i64 %3
    %add = add nsw i32 %7, %call
    %8 = bitcast <8 x i8> %storage.apc.retyped.0 to <2 x i32>
    %9 = insertelement <2 x i32> %8, i32 %add, i64 %3
    %10 = bitcast <2 x i32> %9 to <8 x i8>
    %inc = add nsw i32 %i.0, 1
    br label %for.cond
  }
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
  
  ; Function Attrs: argmemonly nounwind willreturn writeonly
  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
  
  declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
  
  attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { argmemonly nounwind willreturn }
  attributes #2 = { argmemonly nounwind willreturn writeonly }
  attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  
  !llvm.module.flags = !{!0}
  !llvm.ident = !{!1}
  
  !0 = !{i32 1, !"wchar_size", i32 4}
  !1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}

Then `-instcombine -scalarizer` helps cleanup it:

  $ ./bin/opt /tmp/test.ll -instcombine -scalarizer -o - -S
  ; ModuleID = '/tmp/test.ll'
  source_filename = "/tmp/test.cpp"
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
  ; Function Attrs: uwtable
  define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
  entry:
    br label %for.cond
  
  for.cond:                                         ; preds = %for.body, %entry
    %.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
    %.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
    %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    %0 = and i32 %i.0, 1
    %1 = zext i32 %0 to i64
    %cmp = icmp eq i32 %i.0, %width
    br i1 %cmp, label %for.cond.cleanup, label %for.body
  
  for.cond.cleanup:                                 ; preds = %for.cond
    ret void
  
  for.body:                                         ; preds = %for.cond
    %.is.0 = icmp eq i64 %1, 0
    %.upto0 = select i1 %.is.0, i32 %.i0, i32 undef
    %.is.1 = icmp eq i64 %1, 1
    %2 = select i1 %.is.1, i32 %.i1, i32 %.upto0
    %call = call i32 @_Z3adji(i32 %2)
    %.is.03 = icmp eq i64 %1, 0
    %.upto04 = select i1 %.is.03, i32 %.i0, i32 undef
    %.is.15 = icmp eq i64 %1, 1
    %3 = select i1 %.is.15, i32 %.i1, i32 %.upto04
    %add = add nsw i32 %3, %call
    %.is.07 = icmp eq i64 %1, 0
    %.i01 = select i1 %.is.07, i32 %add, i32 %.i0
    %.is.19 = icmp eq i64 %1, 1
    %.i12 = select i1 %.is.19, i32 %add, i32 %.i1
    %inc = add nuw nsw i32 %i.0, 1
    br label %for.cond
  }
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
  
  ; Function Attrs: argmemonly nounwind willreturn writeonly
  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
  
  declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
  
  attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { argmemonly nounwind willreturn }
  attributes #2 = { argmemonly nounwind willreturn writeonly }
  attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  
  !llvm.module.flags = !{!0}
  !llvm.ident = !{!1}
  
  !0 = !{i32 1, !"wchar_size", i32 4}
  !1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}

We have `icmp eq i64 %1, 0` and `icmp eq i64 %1, 1`. The following will depend on the exact pass ordering.
If we happen to run EarlyCSE first, we'd still be stuck with them:

  $ opt-11 /tmp/test.ll -early-cse -instcombine -o - -S
  ; ModuleID = '/tmp/test.ll'
  source_filename = "/tmp/test.cpp"
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
  ; Function Attrs: uwtable
  define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
  entry:
    br label %for.cond
  
  for.cond:                                         ; preds = %for.body, %entry
    %.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
    %.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
    %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    %0 = and i32 %i.0, 1
    %cmp = icmp eq i32 %i.0, %width
    br i1 %cmp, label %for.cond.cleanup, label %for.body
  
  for.cond.cleanup:                                 ; preds = %for.cond
    ret void
  
  for.body:                                         ; preds = %for.cond
    %.is.0 = icmp eq i32 %0, 0
    %.is.1 = icmp ne i32 %0, 0
    %1 = select i1 %.is.1, i32 %.i1, i32 %.i0
    %call = call i32 @_Z3adji(i32 %1)
    %add = add nsw i32 %1, %call
    %.i01 = select i1 %.is.0, i32 %add, i32 %.i0
    %.i12 = select i1 %.is.1, i32 %add, i32 %.i1
    %inc = add nuw nsw i32 %i.0, 1
    br label %for.cond
  }
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
  
  ; Function Attrs: argmemonly nounwind willreturn writeonly
  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
  
  declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
  
  attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { argmemonly nounwind willreturn }
  attributes #2 = { argmemonly nounwind willreturn writeonly }
  attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  
  !llvm.module.flags = !{!0}
  !llvm.ident = !{!1}
  
  !0 = !{i32 1, !"wchar_size", i32 4}
  !1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}

But it'd be okay if we would do:

  $ opt-11 /tmp/test.ll -instcombine -early-cse -o - -S
  ; ModuleID = '/tmp/test.ll'
  source_filename = "/tmp/test.cpp"
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
  ; Function Attrs: uwtable
  define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
  entry:
    br label %for.cond
  
  for.cond:                                         ; preds = %for.body, %entry
    %.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
    %.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
    %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    %0 = and i32 %i.0, 1
    %cmp = icmp eq i32 %i.0, %width
    br i1 %cmp, label %for.cond.cleanup, label %for.body
  
  for.cond.cleanup:                                 ; preds = %for.cond
    ret void
  
  for.body:                                         ; preds = %for.cond
    %.is.1 = icmp eq i32 %0, 0
    %1 = select i1 %.is.1, i32 %.i0, i32 %.i1
    %call = call i32 @_Z3adji(i32 %1)
    %add = add nsw i32 %1, %call
    %.i01 = select i1 %.is.1, i32 %add, i32 %.i0
    %.i12 = select i1 %.is.1, i32 %.i1, i32 %add
    %inc = add nuw nsw i32 %i.0, 1
    br label %for.cond
  }
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
  
  ; Function Attrs: argmemonly nounwind willreturn writeonly
  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
  
  declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
  
  attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { argmemonly nounwind willreturn }
  attributes #2 = { argmemonly nounwind willreturn writeonly }
  attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  
  !llvm.module.flags = !{!0}
  !llvm.ident = !{!1}
  
  !0 = !{i32 1, !"wchar_size", i32 4}
  !1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}

And with this patch, we will get good result regardless:

  $ ./bin/opt /tmp/test.ll -early-cse -instcombine -o - -S
  ; ModuleID = '/tmp/test.ll'
  source_filename = "/tmp/test.cpp"
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
  ; Function Attrs: uwtable
  define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
  entry:
    br label %for.cond
  
  for.cond:                                         ; preds = %for.body, %entry
    %.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
    %.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
    %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    %0 = and i32 %i.0, 1
    %cmp = icmp eq i32 %i.0, %width
    br i1 %cmp, label %for.cond.cleanup, label %for.body
  
  for.cond.cleanup:                                 ; preds = %for.cond
    ret void
  
  for.body:                                         ; preds = %for.cond
    %.is.0 = icmp eq i32 %0, 0
    %.is.1.not = icmp eq i32 %0, 0
    %1 = select i1 %.is.1.not, i32 %.i0, i32 %.i1
    %call = call i32 @_Z3adji(i32 %1)
    %add = add nsw i32 %1, %call
    %.i01 = select i1 %.is.0, i32 %add, i32 %.i0
    %.i12 = select i1 %.is.1.not, i32 %.i1, i32 %add
    %inc = add nuw nsw i32 %i.0, 1
    br label %for.cond
  }
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
  
  ; Function Attrs: argmemonly nounwind willreturn writeonly
  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
  
  declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
  
  ; Function Attrs: argmemonly nounwind willreturn
  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
  
  attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { argmemonly nounwind willreturn }
  attributes #2 = { argmemonly nounwind willreturn writeonly }
  attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
  
  !llvm.module.flags = !{!0}
  !llvm.ident = !{!1}
  
  !0 = !{i32 1, !"wchar_size", i32 4}
  !1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}

which is good, and just needs one more `-early-cse`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83139/new/

https://reviews.llvm.org/D83139





More information about the llvm-commits mailing list