[llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelLowering.cpp README_ALTIVEC.txt

Wed Apr 12 12:07:26 PDT 2006

Changes in directory llvm/lib/Target/PowerPC:

PPCISelLowering.cpp updated: 1.146 -> 1.147
README_ALTIVEC.txt updated: 1.20 -> 1.21
---
Log message:

Add a new way to match vector constants, which make it easier to bang bits of
different types.

Codegen spltw(0x7FFFFFFF) and spltw(0x80000000) without a constant pool load,
implementing PowerPC/vec_constants.ll:test1.  This compiles:

typedef float vf __attribute__ ((vector_size (16)));
typedef int vi __attribute__ ((vector_size (16)));
void test(vi *P1, vi *P2, vf *P3) {
  *P1 &= (vi){0x80000000,0x80000000,0x80000000,0x80000000};
  *P2 &= (vi){0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF};
  *P3 = vec_abs((vector float)*P3);
}

to:

_test:
        mfspr r2, 256
        oris r6, r2, 49152
        mtspr 256: http://llvm.cs.uiuc.edu/PR256 , r6
        vspltisw v0, -1
        vslw v0, v0, v0
        lvx v1, 0, r3
        vand v1, v1, v0
        stvx v1, 0, r3
        lvx v1, 0, r4
        vandc v1, v1, v0
        stvx v1, 0, r4
        lvx v1, 0, r5
        vandc v0, v1, v0
        stvx v0, 0, r5
        mtspr 256: http://llvm.cs.uiuc.edu/PR256 , r2
        blr

instead of (with two constant pool entries):

_test:
        mfspr r2, 256
        oris r6, r2, 49152
        mtspr 256: http://llvm.cs.uiuc.edu/PR256 , r6
        li r6, lo16(LCPI1_0)
        lis r7, ha16(LCPI1_0)
        li r8, lo16(LCPI1_1)
        lis r9, ha16(LCPI1_1)
        lvx v0, r7, r6
        lvx v1, 0, r3
        vand v0, v1, v0
        stvx v0, 0, r3
        lvx v0, r9, r8
        lvx v1, 0, r4
        vand v1, v1, v0
        stvx v1, 0, r4
        lvx v1, 0, r5
        vand v0, v1, v0
        stvx v0, 0, r5
        mtspr 256: http://llvm.cs.uiuc.edu/PR256 , r2
        blr

GCC produces (with 2 cp entries):

_test:
        mfspr r0,256
        stw r0,-4(r1)
        oris r0,r0,0xc00c
        mtspr 256: http://llvm.cs.uiuc.edu/PR256 ,r0
        lis r2,ha16(LC0)
        lis r9,ha16(LC1)
        la r2,lo16(LC0)(r2)
        lvx v0,0,r3
        lvx v1,0,r5
        la r9,lo16(LC1)(r9)
        lwz r12,-4(r1)
        lvx v12,0,r2
        lvx v13,0,r9
        vand v0,v0,v12
        stvx v0,0,r3
        vspltisw v0,-1
        vslw v12,v0,v0
        vandc v1,v1,v12
        stvx v1,0,r5
        lvx v0,0,r4
        vand v0,v0,v13
        stvx v0,0,r4
        mtspr 256: http://llvm.cs.uiuc.edu/PR256 ,r12
        blr



---
Diffs of the changes:  (+91 -7)

 PPCISelLowering.cpp |   89 +++++++++++++++++++++++++++++++++++++++++++++++++---
 README_ALTIVEC.txt  |    9 +++--
 2 files changed, 91 insertions(+), 7 deletions(-)


Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.146 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.147

--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.146	Wed Apr 12 12:37:20 2006
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp	Wed Apr 12 14:07:14 2006
@@ -544,6 +544,48 @@
   return SDOperand();
 }
 
+// If this is a vector of constants or undefs, get the bits.  A bit in
+// UndefBits is set if the corresponding element of the vector is an 
+// ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
+// zero.   Return true if this is not an array of constants, false if it is.
+//
+// Note that VectorBits/UndefBits are returned in 'little endian' form, so
+// elements 0,1 go in VectorBits[0] and 2,3 go in VectorBits[1] for a v4i32.
+static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
+                                       uint64_t UndefBits[2]) {
+  // Start with zero'd results.
+  VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
+  
+  unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
+  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    SDOperand OpVal = BV->getOperand(i);
+    
+    unsigned PartNo = i >= e/2;     // In the upper 128 bits?
+    unsigned SlotNo = i & (e/2-1);  // Which subpiece of the uint64_t it is.
+
+    uint64_t EltBits = 0;
+    if (OpVal.getOpcode() == ISD::UNDEF) {
+      uint64_t EltUndefBits = ~0U >> (32-EltBitSize);
+      UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
+      continue;
+    } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+      EltBits = CN->getValue() & (~0U >> (32-EltBitSize));
+    } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+      assert(CN->getValueType(0) == MVT::f32 &&
+             "Only one legal FP vector type!");
+      EltBits = FloatToBits(CN->getValue());
+    } else {
+      // Nonconstant element.
+      return true;
+    }
+    
+    VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
+  }
+  
+  //printf("%llx %llx  %llx %llx\n", 
+  //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
+  return false;
+}
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
@@ -922,12 +964,20 @@
   }
   case ISD::BUILD_VECTOR: {
     // If this is a case we can't handle, return null and let the default
-    // expansion code take care of it.  If we CAN select this case, return Op.
-
-    // FIXME: We should handle splat(-0.0), and other cases here.
+    // expansion code take care of it.  If we CAN select this case, return Op
+    // or something simpler.
+    
+    // If this is a vector of constants or undefs, get the bits.  A bit in
+    // UndefBits is set if the corresponding element of the vector is an 
+    // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
+    // zero. 
+    uint64_t VectorBits[2];
+    uint64_t UndefBits[2];
+    if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
+      return SDOperand();   // Not a constant vector.
 
     // See if this is all zeros.
-    if (ISD::isBuildVectorAllZeros(Op.Val)) {
+    if ((VectorBits[0] | VectorBits[1]) == 0) {
       // Canonicalize all zero vectors to be v4i32.
       if (Op.getValueType() != MVT::v4i32) {
         SDOperand Z = DAG.getConstant(0, MVT::i32);
@@ -962,6 +1012,37 @@
       }
       return Op;
     }
+
+    // If this is some other splat of 4-byte elements, see if we can handle it
+    // in another way.
+    // FIXME: Make this more undef happy and work with other widths (1,2 bytes).
+    if (VectorBits[0] == VectorBits[1] &&
+        unsigned(VectorBits[0]) == unsigned(VectorBits[0] >> 32)) {
+      unsigned Bits = unsigned(VectorBits[0]);
+
+      // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is 
+      // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  These are important
+      // for fneg/fabs.
+      if (Bits == 0x80000000 || Bits == 0x7FFFFFFF) {
+        // Make -1 and vspltisw -1:
+        SDOperand OnesI = DAG.getConstant(~0U, MVT::i32);
+        SDOperand OnesV = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                                      OnesI, OnesI, OnesI, OnesI);
+        
+        // Make the VSLW intrinsic, computing 0x8000_0000.
+        SDOperand Res
+          = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, MVT::v4i32,
+                        DAG.getConstant(Intrinsic::ppc_altivec_vslw, MVT::i32),
+                        OnesV, OnesV);
+        
+        // If this is 0x7FFF_FFFF, xor by OnesV to invert it.
+        if (Bits == 0x7FFFFFFF)
+          Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
+        
+        return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+      }
+    }
+    
       
     return SDOperand();
   }


Index: llvm/lib/Target/PowerPC/README_ALTIVEC.txt
diff -u llvm/lib/Target/PowerPC/README_ALTIVEC.txt:1.20 llvm/lib/Target/PowerPC/README_ALTIVEC.txt:1.21
--- llvm/lib/Target/PowerPC/README_ALTIVEC.txt:1.20	Tue Apr 11 13:47:03 2006
+++ llvm/lib/Target/PowerPC/README_ALTIVEC.txt	Wed Apr 12 14:07:14 2006
@@ -43,7 +43,8 @@
 altivec instructions.  Examples
  GCC does: "t=vsplti*, r = t+t"  for constants it can't generate with one vsplti
 
- -0.0 (sign bit):  vspltisw v0,-1 / vslw v0,v0,v0
+This should be added to the ISD::BUILD_VECTOR case in 
+PPCTargetLowering::LowerOperation.
 
 //===----------------------------------------------------------------------===//
 
@@ -110,8 +111,10 @@
 
 This would fix two problems:
 1. Writing patterns multiple times.
-2. Identical operations in different types are not getting CSE'd (e.g. 
-   { 0U, 0U, 0U, 0U } and {0.0, 0.0, 0.0, 0.0}.
+2. Identical operations in different types are not getting CSE'd.
+
+We already do this for shuffle and build_vector.  We need load,undef,and,or,xor,
+etc.
 
 //===----------------------------------------------------------------------===//