[PATCH] D37121: [DivRemHoist] add a pass to move div/rem pairs into the same block (PR31028)
Justin Lebar via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 4 18:57:11 PST 2018
jlebar added a comment.
In https://reviews.llvm.org/D37121#1025333, @spatel wrote:
> Can you post an IR example or file a bug that shows the failure? If BypassSlowDivision can get it, but InstCombine can not, then the difference comes down to using computeKnownBits?
Sure, something like:
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
define void @foo(i64 %a, i64* %ptr1, i64* %ptr2) {
%b = and i64 %a, 65535
%div = udiv i64 %b, 42
%rem = urem i64 %b, 42
store i64 %div, i64* %ptr1
store i64 %rem, i64* %ptr2
ret void
}
$ opt -codegenprepare -S
define void @foo(i64 %a, i64* %ptr1, i64* %ptr2) {
%b = and i64 %a, 65535
%1 = trunc i64 %b to i32
%2 = udiv i32 %1, 42
%3 = urem i32 %1, 42
%4 = zext i32 %2 to i64
%5 = zext i32 %3 to i64
store i64 %4, i64* %ptr1
store i64 %5, i64* %ptr2
ret void
}
$ opt -codegenprepare -S | llc
[snip]
shr.u32 %r2, %r1, 1;
mul.wide.u32 %rd3, %r2, 818089009;
shr.u64 %rd4, %rd3, 34;
cvt.u32.u64 %r3, %rd4;
mul.lo.s32 %r4, %r3, 42;
sub.s32 %r5, %r1, %r4;
$ opt -div-rem-pairs -codegenprepare -S
define void @foo(i64 %a, i64* %ptr1, i64* %ptr2) {
%b = and i64 %a, 65535
%1 = trunc i64 %b to i32
%2 = udiv i32 %1, 42
%3 = zext i32 %2 to i64
%4 = mul i64 %3, 42
%5 = sub i64 %b, %4
store i64 %3, i64* %ptr1
store i64 %5, i64* %ptr2
ret void
}
$ opt -div-rem-pairs -codegenprepare -S | llc
[snip]
shr.u32 %r2, %r1, 1;
mul.wide.u32 %rd4, %r2, 818089009;
shr.u64 %rd5, %rd4, 34;
cvt.u32.u64 %r3, %rd5;
mul.wide.u32 %rd6, %r3, 42;
sub.s64 %rd7, %rd1, %rd6;
Repository:
rL LLVM
https://reviews.llvm.org/D37121
More information about the llvm-commits
mailing list