[PATCH] D28721: [NVPTX] Improve lowering of llvm.ctpop.
Justin Lebar via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 17 16:19:30 PST 2017
This revision was automatically updated to reflect the committed changes.
Closed by commit rL292302: [NVPTX] Improve lowering of llvm.ctpop. (authored by jlebar).
Changed prior to commit:
https://reviews.llvm.org/D28721?vs=84423&id=84769#toc
Repository:
rL LLVM
https://reviews.llvm.org/D28721
Files:
llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
Index: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
===================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2822,15 +2822,19 @@
// 32-bit has a direct PTX instruction
def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;
-// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
-// to 64-bit to match the LLVM semantics
+// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit
+// to match the LLVM semantics. Just as with ctlz.i64, we provide a second
+// pattern that avoids the type conversion if we're truncating the result to
+// i32 anyway.
def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>;
-// For 16-bit, we zero-extend to 32-bit, then trunc the result back
-// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
-// than 16 bits to store)
+// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits.
+// If we know that we're storing into an i32, we can avoid the final trunc.
def : Pat<(ctpop Int16Regs:$a),
(CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
+def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
+ (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
// fpround f32 -> f16
def : Pat<(f16 (fpround Float32Regs:$a)),
Index: llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
===================================================================
--- llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
+++ llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
@@ -36,8 +36,62 @@
ret i64 %val
}
+; CHECK-LABEL: test_popc32(
+define i32 @test_popc32(i32 %a) {
+; CHECK: popc.b32
+ %val = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %val
+}
+
+; CHECK-LABEL: test_popc64
+define i64 @test_popc64(i64 %a) {
+; CHECK: popc.b64
+; CHECK: cvt.u64.u32
+ %val = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %val
+}
+
+; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so
+; if this function returns an i32, there's no need to do any type conversions
+; in the ptx.
+; CHECK-LABEL: test_popc64_trunc
+define i32 @test_popc64_trunc(i64 %a) {
+; CHECK: popc.b64
+; CHECK-NOT: cvt.
+ %val = call i64 @llvm.ctpop.i64(i64 %a)
+ %trunc = trunc i64 %val to i32
+ ret i32 %trunc
+}
+
+; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
+; then converting back to i16.
+; CHECK-LABEL: test_popc16
+define void @test_popc16(i16 %a, i16* %b) {
+; CHECK: cvt.u32.u16
+; CHECK: popc.b32
+; CHECK: cvt.u16.u32
+ %val = call i16 @llvm.ctpop.i16(i16 %a)
+ store i16 %val, i16* %b
+ ret void
+}
+
+; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need
+; to do any conversions after calling popc.b32, because that returns an i32.
+; CHECK-LABEL: test_popc16_to_32
+define i32 @test_popc16_to_32(i16 %a) {
+; CHECK: cvt.u32.u16
+; CHECK: popc.b32
+; CHECK-NOT: cvt.
+ %val = call i16 @llvm.ctpop.i16(i16 %a)
+ %zext = zext i16 %val to i32
+ ret i32 %zext
+}
+
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
declare i32 @llvm.bitreverse.i32(i32)
declare i64 @llvm.bitreverse.i64(i64)
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D28721.84769.patch
Type: text/x-patch
Size: 3454 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170118/6a8012ec/attachment.bin>
More information about the llvm-commits
mailing list