[PATCH] D25896: [PowerPC] - No SExt/ZExt needed for count trailing zeros
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 22 05:00:25 PDT 2016
nemanjai created this revision.
nemanjai added reviewers: hfinkel, amehsan, kbarton.
nemanjai added a subscriber: llvm-commits.
nemanjai set the repository for this revision to rL LLVM.
Power9 provides the cnttzw instruction. However, in 64-bit mode, the instruction is followed by a clrldi instruction which is redundant. This patch provides the same handling for this issue as was done with cntlzw.
Repository:
rL LLVM
https://reviews.llvm.org/D25896
Files:
lib/Target/PowerPC/PPCISelDAGToDAG.cpp
test/CodeGen/PowerPC/no-ext-with-count-zeros.ll
Index: test/CodeGen/PowerPC/no-ext-with-count-zeros.ll
===================================================================
--- test/CodeGen/PowerPC/no-ext-with-count-zeros.ll
+++ test/CodeGen/PowerPC/no-ext-with-count-zeros.ll
@@ -0,0 +1,54 @@
+; Function Attrs: nounwind readnone
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN: -mcpu=pwr9 < %s | FileCheck %s
+
+define signext i32 @ctw(i32 signext %a) {
+entry:
+ %0 = tail call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %0
+; CHECK-LABEL: ctw
+; CHECK: cnttzw 3, 3
+; CHECK-NEXT: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1)
+
+; Function Attrs: nounwind readnone
+define signext i32 @clw(i32 signext %a) {
+entry:
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ ret i32 %0
+; CHECK-LABEL: clw
+; CHECK: cntlzw 3, 3
+; CHECK-NEXT: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+; Function Attrs: nounwind readnone
+define i64 @ctd(i64 %a) {
+entry:
+ %0 = tail call i64 @llvm.cttz.i64(i64 %a, i1 false)
+ ret i64 %0
+; CHECK-LABEL: ctd
+; CHECK: cnttzd 3, 3
+; CHECK-NEXT: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1)
+
+; Function Attrs: nounwind readnone
+define i64 @cld(i64 %a) {
+entry:
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+ ret i64 %0
+; CHECK-LABEL: cld
+; CHECK: cntlzd 3, 3
+; CHECK-NEXT: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1)
Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4024,8 +4024,9 @@
return true;
}
- // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
- if (Op32.getMachineOpcode() == PPC::CNTLZW) {
+ // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
+ if (Op32.getMachineOpcode() == PPC::CNTLZW ||
+ Op32.getMachineOpcode() == PPC::CNTTZW) {
ToPromote.insert(Op32.getNode());
return true;
}
@@ -4220,6 +4221,7 @@
case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
+ case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
case PPC::OR: NewOpcode = PPC::OR8; break;
case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D25896.75529.patch
Type: text/x-patch
Size: 2585 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161022/48abd08e/attachment.bin>
More information about the llvm-commits
mailing list