[llvm-commits] CVS: llvm/lib/Target/PowerPC/PPC32ISelPattern.cpp

Nate Begeman natebegeman at mac.com
Tue Jun 7 21:14:55 PDT 2005



Changes in directory llvm/lib/Target/PowerPC:

PPC32ISelPattern.cpp updated: 1.91 -> 1.92
---
Log message:

Handle some more real world cases of rlwimi.  These don't come up that
regularly in "normal" code, but for things like software graphics, they
make a big difference.

For the following code:
unsigned short Trans16Bit(unsigned srcA,unsigned srcB,unsigned alpha)
{
	unsigned tmpA,tmpB,mixed;
	tmpA = ((srcA & 0x03E0) << 15) | (srcA & 0x7C1F);
	tmpB = ((srcB & 0x03E0) << 15) | (srcB & 0x7C1F);
	mixed = (tmpA * alpha) + (tmpB * (32 - alpha));
	return ((mixed >> 5) & 0x7C1F) | ((mixed >> 20) & 0x03E0);
}

We now generate:
_Trans16Bit:
.LBB_Trans16Bit_0:      ; entry
        andi. r2, r4, 31775
        rlwimi r2, r4, 15, 7, 11
        subfic r4, r5, 32
        mullw r2, r2, r4
        andi. r4, r3, 31775
        rlwimi r4, r3, 15, 7, 11
        mullw r3, r4, r5
        add r2, r2, r3
        srwi r3, r2, 5
        andi. r3, r3, 31775
        rlwimi r3, r2, 12, 22, 26
        blr

Instead of:
_Trans16Bit:
.LBB_Trans16Bit_0:      ; entry
        slwi r2, r4, 15
        rlwinm r2, r2, 0, 7, 11
        andi. r4, r4, 31775
        or r2, r2, r4
        subfic r4, r5, 32
        mullw r2, r2, r4
        slwi r4, r3, 15
        rlwinm r4, r4, 0, 7, 11
        andi. r3, r3, 31775
        or r3, r4, r3
        mullw r3, r3, r5
        add r2, r2, r3
        srwi r3, r2, 5
        andi. r3, r3, 31775
        srwi r2, r2, 20
        rlwimi r3, r2, 0, 22, 26
        blr



---
Diffs of the changes:  (+42 -10)

 PPC32ISelPattern.cpp |   52 +++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 42 insertions(+), 10 deletions(-)


Index: llvm/lib/Target/PowerPC/PPC32ISelPattern.cpp
diff -u llvm/lib/Target/PowerPC/PPC32ISelPattern.cpp:1.91 llvm/lib/Target/PowerPC/PPC32ISelPattern.cpp:1.92
--- llvm/lib/Target/PowerPC/PPC32ISelPattern.cpp:1.91	Sun May 15 14:54:37 2005
+++ llvm/lib/Target/PowerPC/PPC32ISelPattern.cpp	Tue Jun  7 23:14:27 2005
@@ -982,8 +982,12 @@
 bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) {
   bool IsRotate = false;
   unsigned TgtMask = 0xFFFFFFFF, InsMask = 0xFFFFFFFF, Amount = 0;
-  unsigned Op0Opc = OR.getOperand(0).getOpcode();
-  unsigned Op1Opc = OR.getOperand(1).getOpcode();
+  
+  SDOperand Op0 = OR.getOperand(0);
+  SDOperand Op1 = OR.getOperand(1);
+
+  unsigned Op0Opc = Op0.getOpcode();
+  unsigned Op1Opc = Op1.getOpcode();
 
   // Verify that we have the correct opcodes
   if (ISD::SHL != Op0Opc && ISD::SRL != Op0Opc && ISD::AND != Op0Opc)
@@ -993,7 +997,7 @@
 
   // Generate Mask value for Target
   if (ConstantSDNode *CN =
-      dyn_cast<ConstantSDNode>(OR.getOperand(0).getOperand(1).Val)) {
+      dyn_cast<ConstantSDNode>(Op0.getOperand(1).Val)) {
     switch(Op0Opc) {
     case ISD::SHL: TgtMask <<= (unsigned)CN->getValue(); break;
     case ISD::SRL: TgtMask >>= (unsigned)CN->getValue(); break;
@@ -1005,7 +1009,7 @@
 
   // Generate Mask value for Insert
   if (ConstantSDNode *CN =
-      dyn_cast<ConstantSDNode>(OR.getOperand(1).getOperand(1).Val)) {
+      dyn_cast<ConstantSDNode>(Op1.getOperand(1).Val)) {
     switch(Op1Opc) {
     case ISD::SHL:
       Amount = CN->getValue();
@@ -1026,27 +1030,55 @@
     return false;
   }
 
+  unsigned Tmp3 = 0;
+
+  // If both of the inputs are ANDs and one of them has a logical shift by
+  // constant as its input, make that the inserted value so that we can combine
+  // the shift into the rotate part of the rlwimi instruction
+  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+    if (Op1.getOperand(0).getOpcode() == ISD::SHL || 
+        Op1.getOperand(0).getOpcode() == ISD::SRL) {
+      if (ConstantSDNode *CN = 
+          dyn_cast<ConstantSDNode>(Op1.getOperand(0).getOperand(1).Val)) {
+        Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ? 
+          CN->getValue() : 32 - CN->getValue();
+        Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0));
+      }
+    } else if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+               Op0.getOperand(0).getOpcode() == ISD::SRL) {
+      if (ConstantSDNode *CN = 
+          dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(1).Val)) {
+        std::swap(Op0, Op1);
+        std::swap(TgtMask, InsMask);
+        Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ? 
+          CN->getValue() : 32 - CN->getValue();
+        Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0));
+      }
+    }
+  }
+
   // Verify that the Target mask and Insert mask together form a full word mask
   // and that the Insert mask is a run of set bits (which implies both are runs
   // of set bits).  Given that, Select the arguments and generate the rlwimi
   // instruction.
   unsigned MB, ME;
-  if (((TgtMask ^ InsMask) == 0xFFFFFFFF) && IsRunOfOnes(InsMask, MB, ME)) {
+  if (((TgtMask & InsMask) == 0) && IsRunOfOnes(InsMask, MB, ME)) {
     unsigned Tmp1, Tmp2;
+    bool fullMask = (TgtMask ^ InsMask) == 0xFFFFFFFF;
     // Check for rotlwi / rotrwi here, a special case of bitfield insert
     // where both bitfield halves are sourced from the same value.
-    if (IsRotate &&
+    if (IsRotate && fullMask &&
         OR.getOperand(0).getOperand(0) == OR.getOperand(1).getOperand(0)) {
       Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0));
       BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(Amount)
         .addImm(0).addImm(31);
       return true;
     }
-    if (Op0Opc == ISD::AND)
-      Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0));
+    if (Op0Opc == ISD::AND && fullMask)
+      Tmp1 = SelectExpr(Op0.getOperand(0));
     else
-      Tmp1 = SelectExpr(OR.getOperand(0));
-    Tmp2 = SelectExpr(OR.getOperand(1).getOperand(0));
+      Tmp1 = SelectExpr(Op0);
+    Tmp2 = Tmp3 ? Tmp3 : SelectExpr(Op1.getOperand(0));
     BuildMI(BB, PPC::RLWIMI, 5, Result).addReg(Tmp1).addReg(Tmp2)
       .addImm(Amount).addImm(MB).addImm(ME);
     return true;






More information about the llvm-commits mailing list