[llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp README.txt

Mon May 8 10:38:44 PDT 2006

Changes in directory llvm/lib/Target/PowerPC:

PPCISelDAGToDAG.cpp updated: 1.182 -> 1.183
README.txt updated: 1.87 -> 1.88
---
Log message:

Fold more shifts into inserts, and update the README


---
Diffs of the changes:  (+34 -20)

 PPCISelDAGToDAG.cpp |   39 +++++++++++++++++++++++----------------
 README.txt          |   15 +++++++++++----
 2 files changed, 34 insertions(+), 20 deletions(-)


Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
diff -u llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.182 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.183

--- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.182	Sun May  7 21:52:37 2006
+++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp	Mon May  8 12:38:32 2006
@@ -392,25 +392,25 @@
 /// SelectBitfieldInsert - turn an or of two masked values into
 /// the rotate left word immediate then mask insert (rlwimi) instruction.
 SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
-  unsigned TgtMask = 0xFFFFFFFF, InsMask = 0xFFFFFFFF, SH = 0;
-  unsigned Value;
-  
   SDOperand Op0 = N->getOperand(0);
   SDOperand Op1 = N->getOperand(1);
   
-  unsigned Op0Opc = Op0.getOpcode();
-  unsigned Op1Opc = Op1.getOpcode();
-  
   uint64_t LKZ, LKO, RKZ, RKO;
-  TLI.ComputeMaskedBits(Op0, TgtMask, LKZ, LKO);
-  TLI.ComputeMaskedBits(Op1, TgtMask, RKZ, RKO);
+  TLI.ComputeMaskedBits(Op0, 0xFFFFFFFFULL, LKZ, LKO);
+  TLI.ComputeMaskedBits(Op1, 0xFFFFFFFFULL, RKZ, RKO);
   
-  if ((LKZ | RKZ) == 0x00000000FFFFFFFFULL) {
-    unsigned PInsMask = ~RKZ;
-    unsigned PTgtMask = ~LKZ;
+  unsigned TargetMask = LKZ;
+  unsigned InsertMask = RKZ;
+  
+  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+    unsigned Op0Opc = Op0.getOpcode();
+    unsigned Op1Opc = Op1.getOpcode();
+    unsigned Value, SH = 0;
+    TargetMask = ~TargetMask;
+    InsertMask = ~InsertMask;
 
-    // If the LHS has a foldable shift, then swap it to the RHS so that we can
-    // fold the shift into the insert.
+    // If the LHS has a foldable shift and the RHS does not, then swap it to the
+    // RHS so that we can fold the shift into the insert.
     if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
       if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
           Op0.getOperand(0).getOpcode() == ISD::SRL) {
@@ -418,15 +418,22 @@
             Op1.getOperand(0).getOpcode() != ISD::SRL) {
           std::swap(Op0, Op1);
           std::swap(Op0Opc, Op1Opc);
-          std::swap(PInsMask, PTgtMask);
+          std::swap(TargetMask, InsertMask);
         }
       }
+    } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+      if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+          Op1.getOperand(0).getOpcode() != ISD::SRL) {
+        std::swap(Op0, Op1);
+        std::swap(Op0Opc, Op1Opc);
+        std::swap(TargetMask, InsertMask);
+      }
     }
     
     unsigned MB, ME;
-    if (isRunOfOnes(PInsMask, MB, ME)) {
+    if (isRunOfOnes(InsertMask, MB, ME)) {
       SDOperand Tmp1, Tmp2, Tmp3;
-      bool DisjointMask = (PTgtMask ^ PInsMask) == 0xFFFFFFFF;
+      bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF;
 
       if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
           isIntImmediate(Op1.getOperand(1), Value)) {


Index: llvm/lib/Target/PowerPC/README.txt
diff -u llvm/lib/Target/PowerPC/README.txt:1.87 llvm/lib/Target/PowerPC/README.txt:1.88
--- llvm/lib/Target/PowerPC/README.txt:1.87	Sun May  7 21:52:38 2006
+++ llvm/lib/Target/PowerPC/README.txt	Mon May  8 12:38:32 2006
@@ -516,10 +516,17 @@
         srwi r4, r2, 30
         srwi r5, r2, 31
         or r4, r4, r5
-        slwi r4, r4, 31
-        rlwimi r4, r2, 0, 1, 31
-        stw r4, 0(r3)
+        rlwimi r2, r4, 31, 0, 0
+        stw r2, 0(r3)
         blr
 
-I *think* that could use another rlwimi.
+What this code is really doing is ORing bit 0 with bit 1.  We could codegen this
+as:
 
+_foo:
+        lwz r2, 0(r3)
+        slwi r4, r2, 1
+        rlwinm r4, r4, 0, 0, 0
+        or r2, r2, r4
+        stw r2, 0(r3)
+        blr