[llvm] r225192 - [PowerPC] Remove zexts after i32 ctlz

Hal Finkel hfinkel at anl.gov
Mon Jan 5 10:52:30 PST 2015


Author: hfinkel
Date: Mon Jan  5 12:52:29 2015
New Revision: 225192

URL: http://llvm.org/viewvc/llvm-project?rev=225192&view=rev
Log:
[PowerPC] Remove zexts after i32 ctlz

The 64-bit semantics of cntlzw are not special, the 32-bit population count is
stored as a 64-bit value in the range [0,32]. As a result, it is always zero
extended, and it can be added to the PPCISelDAGToDAG peephole optimization as a
frontier instruction for the removal of unnecessary zero extensions.

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
    llvm/trunk/test/CodeGen/PowerPC/rm-zext.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp?rev=225192&r1=225191&r2=225192&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Mon Jan  5 12:52:29 2015
@@ -3736,6 +3736,12 @@ static bool PeepholePPC64ZExtGather(SDVa
     return true;
   }
 
+  // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
+  if (Op32.getMachineOpcode() == PPC::CNTLZW) {
+    ToPromote.insert(Op32.getNode());
+    return true;
+  }
+
   // Next, check for those instructions we can look through.
 
   // Assuming the mask does not wrap around, then the higher-order bits are
@@ -3925,6 +3931,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt(
       case PPC::LIS:       NewOpcode = PPC::LIS8; break;
       case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
       case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
+      case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
       case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
       case PPC::OR:        NewOpcode = PPC::OR8; break;
       case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td?rev=225192&r1=225191&r2=225192&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td Mon Jan  5 12:52:29 2015
@@ -551,7 +551,10 @@ defm SRAD : XForm_6rc<31, 794, (outs g8r
                       "srad", "$rA, $rS, $rB", IIC_IntRotateD,
                       [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
 
-let Interpretation64Bit = 1, isCodeGenOnly = 1 in { 
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+defm CNTLZW8 : XForm_11r<31,  26, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "cntlzw", "$rA, $rS", IIC_IntGeneral, []>;
+
 defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
                         "extsb", "$rA, $rS", IIC_IntSimple,
                         [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;

Modified: llvm/trunk/test/CodeGen/PowerPC/rm-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/rm-zext.ll?rev=225192&r1=225191&r2=225192&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/rm-zext.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/rm-zext.ll Mon Jan  5 12:52:29 2015
@@ -40,10 +40,10 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.bswap.i32(i32) #1
+declare i32 @llvm.bswap.i32(i32) #0
 
 ; Function Attrs: nounwind readonly
-define zeroext i32 @bs32(i32* nocapture readonly %x) #0 {
+define zeroext i32 @bs32(i32* nocapture readonly %x) #1 {
 entry:
   %0 = load i32* %x, align 4
   %1 = tail call i32 @llvm.bswap.i32(i32 %0)
@@ -55,7 +55,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readonly
-define zeroext i16 @bs16(i16* nocapture readonly %x) #0 {
+define zeroext i16 @bs16(i16* nocapture readonly %x) #1 {
 entry:
   %0 = load i16* %x, align 2
   %1 = tail call i16 @llvm.bswap.i16(i16 %0)
@@ -67,7 +67,23 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i16 @llvm.bswap.i16(i16) #1
+declare i16 @llvm.bswap.i16(i16) #0
+
+; Function Attrs: nounwind readnone
+define zeroext i32 @ctlz32(i32 zeroext %x) #0 {
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  ret i32 %0
+
+; CHECK-LABEL: @ctlz32
+; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) #0
+
 
 attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
 





More information about the llvm-commits mailing list