[PATCH] D83139: [InstCombine] Always try to invert non-canonical predicate of an icmp
Roman Lebedev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 4 03:45:44 PDT 2020
lebedev.ri added a comment.
For example, we could end up with the following after `-alloca-promotion-coercion -mem2reg` on https://godbolt.org/z/bwuEmJ
*** IR Dump After Promote Memory to Register *** (function: _Z4loopi)
; ModuleID = '/tmp/test.cpp'
source_filename = "/tmp/test.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: uwtable
define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
entry:
%0 = shufflevector <8 x i8> undef, <8 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
br label %for.cond
for.cond: ; preds = %for.body, %entry
%storage.apc.retyped.0 = phi <8 x i8> [ %0, %entry ], [ %10, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%1 = mul i32 %i.0, -1
%2 = trunc i32 %1 to i1
%3 = zext i1 %2 to i64
%cmp = icmp ne i32 %i.0, %width
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
ret void
for.body: ; preds = %for.cond
%4 = bitcast <8 x i8> %storage.apc.retyped.0 to <2 x i32>
%5 = extractelement <2 x i32> %4, i64 %3
%call = call i32 @_Z3adji(i32 %5)
%6 = bitcast <8 x i8> %storage.apc.retyped.0 to <2 x i32>
%7 = extractelement <2 x i32> %6, i64 %3
%add = add nsw i32 %7, %call
%8 = bitcast <8 x i8> %storage.apc.retyped.0 to <2 x i32>
%9 = insertelement <2 x i32> %8, i32 %add, i64 %3
%10 = bitcast <2 x i32> %9 to <8 x i8>
%inc = add nsw i32 %i.0, 1
br label %for.cond
}
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { argmemonly nounwind willreturn writeonly }
attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}
Then `-instcombine -scalarizer` helps cleanup it:
$ ./bin/opt /tmp/test.ll -instcombine -scalarizer -o - -S
; ModuleID = '/tmp/test.ll'
source_filename = "/tmp/test.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: uwtable
define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.body, %entry
%.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
%.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = and i32 %i.0, 1
%1 = zext i32 %0 to i64
%cmp = icmp eq i32 %i.0, %width
br i1 %cmp, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.cond
ret void
for.body: ; preds = %for.cond
%.is.0 = icmp eq i64 %1, 0
%.upto0 = select i1 %.is.0, i32 %.i0, i32 undef
%.is.1 = icmp eq i64 %1, 1
%2 = select i1 %.is.1, i32 %.i1, i32 %.upto0
%call = call i32 @_Z3adji(i32 %2)
%.is.03 = icmp eq i64 %1, 0
%.upto04 = select i1 %.is.03, i32 %.i0, i32 undef
%.is.15 = icmp eq i64 %1, 1
%3 = select i1 %.is.15, i32 %.i1, i32 %.upto04
%add = add nsw i32 %3, %call
%.is.07 = icmp eq i64 %1, 0
%.i01 = select i1 %.is.07, i32 %add, i32 %.i0
%.is.19 = icmp eq i64 %1, 1
%.i12 = select i1 %.is.19, i32 %add, i32 %.i1
%inc = add nuw nsw i32 %i.0, 1
br label %for.cond
}
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { argmemonly nounwind willreturn writeonly }
attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}
We have `icmp eq i64 %1, 0` and `icmp eq i64 %1, 1`. The following will depend on the exact pass ordering.
If we happen to run EarlyCSE first, we'd still be stuck with them:
$ opt-11 /tmp/test.ll -early-cse -instcombine -o - -S
; ModuleID = '/tmp/test.ll'
source_filename = "/tmp/test.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: uwtable
define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.body, %entry
%.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
%.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = and i32 %i.0, 1
%cmp = icmp eq i32 %i.0, %width
br i1 %cmp, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.cond
ret void
for.body: ; preds = %for.cond
%.is.0 = icmp eq i32 %0, 0
%.is.1 = icmp ne i32 %0, 0
%1 = select i1 %.is.1, i32 %.i1, i32 %.i0
%call = call i32 @_Z3adji(i32 %1)
%add = add nsw i32 %1, %call
%.i01 = select i1 %.is.0, i32 %add, i32 %.i0
%.i12 = select i1 %.is.1, i32 %add, i32 %.i1
%inc = add nuw nsw i32 %i.0, 1
br label %for.cond
}
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { argmemonly nounwind willreturn writeonly }
attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}
But it'd be okay if we would do:
$ opt-11 /tmp/test.ll -instcombine -early-cse -o - -S
; ModuleID = '/tmp/test.ll'
source_filename = "/tmp/test.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: uwtable
define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.body, %entry
%.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
%.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = and i32 %i.0, 1
%cmp = icmp eq i32 %i.0, %width
br i1 %cmp, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.cond
ret void
for.body: ; preds = %for.cond
%.is.1 = icmp eq i32 %0, 0
%1 = select i1 %.is.1, i32 %.i0, i32 %.i1
%call = call i32 @_Z3adji(i32 %1)
%add = add nsw i32 %1, %call
%.i01 = select i1 %.is.1, i32 %add, i32 %.i0
%.i12 = select i1 %.is.1, i32 %.i1, i32 %add
%inc = add nuw nsw i32 %i.0, 1
br label %for.cond
}
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { argmemonly nounwind willreturn writeonly }
attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}
And with this patch, we will get good result regardless:
$ ./bin/opt /tmp/test.ll -early-cse -instcombine -o - -S
; ModuleID = '/tmp/test.ll'
source_filename = "/tmp/test.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: uwtable
define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.body, %entry
%.i0 = phi i32 [ 0, %entry ], [ %.i01, %for.body ]
%.i1 = phi i32 [ 0, %entry ], [ %.i12, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = and i32 %i.0, 1
%cmp = icmp eq i32 %i.0, %width
br i1 %cmp, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.cond
ret void
for.body: ; preds = %for.cond
%.is.0 = icmp eq i32 %0, 0
%.is.1.not = icmp eq i32 %0, 0
%1 = select i1 %.is.1.not, i32 %.i0, i32 %.i1
%call = call i32 @_Z3adji(i32 %1)
%add = add nsw i32 %1, %call
%.i01 = select i1 %.is.0, i32 %add, i32 %.i0
%.i12 = select i1 %.is.1.not, i32 %.i1, i32 %add
%inc = add nuw nsw i32 %i.0, 1
br label %for.cond
}
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { argmemonly nounwind willreturn writeonly }
attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (git at github.com:LebedevRI/llvm-project.git ff1dbd7ce139b7769158065864ea2615b38f3e16)"}
which is good, and just needs one more `-early-cse`.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D83139/new/
https://reviews.llvm.org/D83139
More information about the llvm-commits
mailing list