[PATCH] D11926: [NVPTX] Use 32-bit divides instead of 64-bit divides where possible
Mark Heffernan via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 11 12:17:46 PDT 2015
meheff updated this revision to Diff 31848.
meheff added a comment.
Test added. Unfortunately the stand alone Eigen3 benchmarks don't show much improvement with this patch because, I believe, they use 32-bit indices throughout. Where we see the huge speedup is in the larger-scale benchmarks using Eigen with 64-bit indices.
http://reviews.llvm.org/D11926
Files:
lib/Target/NVPTX/NVPTXISelLowering.cpp
test/CodeGen/NVPTX/bypass-div.ll
Index: test/CodeGen/NVPTX/bypass-div.ll
===================================================================
--- test/CodeGen/NVPTX/bypass-div.ll
+++ test/CodeGen/NVPTX/bypass-div.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+
+; 64-bit divides and rems should be split into a fast and slow path where
+; the fast path uses a 32-bit operation.
+
+define void @sdiv64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: sdiv64(
+; CHECK: div.s64
+; CHECK: div.u32
+; CHECK: ret
+ %d = sdiv i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @udiv64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: udiv64(
+; CHECK: div.u64
+; CHECK: div.u32
+; CHECK: ret
+ %d = udiv i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @srem64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: srem64(
+; CHECK: rem.s64
+; CHECK: rem.u32
+; CHECK: ret
+ %d = srem i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @urem64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: urem64(
+; CHECK: rem.u64
+; CHECK: rem.u32
+; CHECK: ret
+ %d = urem i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @sdiv32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: sdiv32(
+; CHECK: div.s32
+; CHECK-NOT: div.
+ %d = sdiv i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
+
+define void @udiv32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: udiv32(
+; CHECK: div.u32
+; CHECK-NOT: div.
+ %d = udiv i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
+
+define void @srem32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: srem32(
+; CHECK: rem.s32
+; CHECK-NOT: rem.
+ %d = srem i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
+
+define void @urem32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: urem32(
+; CHECK: rem.u32
+; CHECK-NOT: rem.
+ %d = urem i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -124,6 +124,10 @@
// condition branches.
setJumpIsExpensive(true);
+ // Wide divides are _very_ slow. Try to reduce the width of the divide if
+ // possible.
+ addBypassSlowDiv(64, 32);
+
// By default, use the Source scheduling
if (sched4reg)
setSchedulingPreference(Sched::RegPressure);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D11926.31848.patch
Type: text/x-patch
Size: 2505 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150811/b86f9ce0/attachment.bin>
More information about the llvm-commits
mailing list