[llvm] r186573 - [SystemZ] Use RNSBG

Thu Jul 18 03:40:35 PDT 2013

Author: rsandifo
Date: Thu Jul 18 05:40:35 2013
New Revision: 186573

URL: http://llvm.org/viewvc/llvm-project?rev=186573&view=rev
Log:
[SystemZ] Use RNSBG

This should be the last of the R.SBG patches for now.

Added:
    llvm/trunk/test/CodeGen/SystemZ/rnsbg-01.ll
Modified:
    llvm/trunk/lib/Target/SystemZ/README.txt
    llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

Modified: llvm/trunk/lib/Target/SystemZ/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/README.txt?rev=186573&r1=186572&r2=186573&view=diff
==============================================================================

--- llvm/trunk/lib/Target/SystemZ/README.txt (original)
+++ llvm/trunk/lib/Target/SystemZ/README.txt Thu Jul 18 05:40:35 2013
@@ -118,11 +118,6 @@ such as ICM and STCM.
 
 --
 
-We could make more use of the ROTATE AND ... SELECTED BITS instructions.
-At the moment we only use RISBG, and only then for subword atomic operations.
-
---
-
 DAGCombiner can detect integer absolute, but there's not yet an associated
 ISD opcode.  We could add one and implement it using LOAD POSITIVE.
 Negated absolutes could use LOAD NEGATIVE.

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp?rev=186573&r1=186572&r2=186573&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp Thu Jul 18 05:40:35 2013
@@ -97,15 +97,24 @@ static uint64_t allOnes(unsigned int Cou
   return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
 }
 
-// Represents operands 2 to 5 of a ROTATE AND ... SELECTED BITS operation.
-// The operands are: Input (R2), Start (I3), End (I4) and Rotate (I5).
-// The operand value is effectively (and (rotl Input Rotate) Mask) and
-// has BitSize bits.
+// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation
+// given by Opcode.  The operands are: Input (R2), Start (I3), End (I4) and
+// Rotate (I5).  The combined operand value is effectively:
+//
+//   (or (rotl Input, Rotate), ~Mask)
+//
+// for RNSBG and:
+//
+//   (and (rotl Input, Rotate), Mask)
+//
+// otherwise.  The value has BitSize bits.
 struct RxSBGOperands {
-  RxSBGOperands(SDValue N)
-    : BitSize(N.getValueType().getSizeInBits()), Mask(allOnes(BitSize)),
-      Input(N), Start(64 - BitSize), End(63), Rotate(0) {}
+  RxSBGOperands(unsigned Op, SDValue N)
+    : Opcode(Op), BitSize(N.getValueType().getSizeInBits()),
+      Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63),
+      Rotate(0) {}
 
+  unsigned Opcode;
   unsigned BitSize;
   uint64_t Mask;
   SDValue Input;
@@ -671,6 +680,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(Rx
   unsigned Opcode = N.getOpcode();
   switch (Opcode) {
   case ISD::AND: {
+    if (RxSBG.Opcode == SystemZ::RNSBG)
+      return false;
+
     ConstantSDNode *MaskNode =
       dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
     if (!MaskNode)
@@ -692,6 +704,31 @@ bool SystemZDAGToDAGISel::expandRxSBG(Rx
     return true;
   }
 
+  case ISD::OR: {
+    if (RxSBG.Opcode != SystemZ::RNSBG)
+      return false;
+
+    ConstantSDNode *MaskNode =
+      dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
+    if (!MaskNode)
+      return false;
+
+    SDValue Input = N.getOperand(0);
+    uint64_t Mask = ~MaskNode->getZExtValue();
+    if (!refineRxSBGMask(RxSBG, Mask)) {
+      // If some bits of Input are already known ones, those bits will have
+      // been removed from the mask.  See if adding them back in makes the
+      // mask suitable.
+      APInt KnownZero, KnownOne;
+      CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne);
+      Mask &= ~KnownOne.getZExtValue();
+      if (!refineRxSBGMask(RxSBG, Mask))
+        return false;
+    }
+    RxSBG.Input = Input;
+    return true;
+  }
+
   case ISD::ROTL: {
     // Any 64-bit rotate left can be merged into the RxSBG.
     if (RxSBG.BitSize != 64)
@@ -707,18 +744,26 @@ bool SystemZDAGToDAGISel::expandRxSBG(Rx
   }
       
   case ISD::SHL: {
-    // Treat (shl X, count) as (and (rotl X, count), ~0<<count).
     ConstantSDNode *CountNode =
       dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
     if (!CountNode)
       return false;
 
     uint64_t Count = CountNode->getZExtValue();
-    if (Count < 1 ||
-        Count >= RxSBG.BitSize ||
-        !refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count))
+    if (Count < 1 || Count >= RxSBG.BitSize)
       return false;
 
+    if (RxSBG.Opcode == SystemZ::RNSBG) {
+      // Treat (shl X, count) as (rotl X, size-count) as long as the bottom
+      // count bits from RxSBG.Input are ignored.
+      if (shiftedInBitsMatter(RxSBG, Count, true))
+        return false;
+    } else {
+      // Treat (shl X, count) as (and (rotl X, count), ~0<<count).
+      if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count))
+        return false;
+    }
+
     RxSBG.Rotate = (RxSBG.Rotate + Count) & 63;
     RxSBG.Input = N.getOperand(0);
     return true;
@@ -735,9 +780,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(Rx
     if (Count < 1 || Count >= RxSBG.BitSize)
       return false;
 
-    if (Opcode == ISD::SRA) {
-      // Treat (sra X, count) as (rotl X, size-count) as long as the top
-      // Count bits from RxSBG.Input are ignored.
+    if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) {
+      // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top
+      // count bits from RxSBG.Input are ignored.
       if (shiftedInBitsMatter(RxSBG, Count, false))
         return false;
     } else {
@@ -779,7 +824,7 @@ SDValue SystemZDAGToDAGISel::convertTo(S
 }
 
 SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
-  RxSBGOperands RISBG(SDValue(N, 0));
+  RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
   unsigned Count = 0;
   while (expandRxSBG(RISBG))
     Count += 1;
@@ -811,7 +856,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZer
 SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
   // Try treating each operand of N as the second operand of the RxSBG
   // and see which goes deepest.
-  RxSBGOperands RxSBG[] = { N->getOperand(0), N->getOperand(1) };
+  RxSBGOperands RxSBG[] = {
+    RxSBGOperands(Opcode, N->getOperand(0)),
+    RxSBGOperands(Opcode, N->getOperand(1))
+  };
   unsigned Count[] = { 0, 0 };
   for (unsigned I = 0; I < 2; ++I)
     while (expandRxSBG(RxSBG[I]))
@@ -941,6 +989,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNo
     break;
 
   case ISD::AND:
+    if (Node->getOperand(1).getOpcode() != ISD::Constant)
+      ResNode = tryRxSBG(Node, SystemZ::RNSBG);
+    // Fall through.
   case ISD::ROTL:
   case ISD::SHL:
   case ISD::SRL:

Added: llvm/trunk/test/CodeGen/SystemZ/rnsbg-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/rnsbg-01.ll?rev=186573&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/rnsbg-01.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/rnsbg-01.ll Thu Jul 18 05:40:35 2013
@@ -0,0 +1,257 @@
+; Test sequences that can use RNSBG.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test a simple mask, which is a wrap-around case.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: rnsbg %r2, %r3, 59, 56, 0
+; CHECK: br %r14
+  %orb = or i32 %b, 96
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: rnsbg %r2, %r3, 59, 56, 0
+; CHECK: br %r14
+  %orb = or i64 %b, 96
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case where no wraparound is needed.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: rnsbg %r2, %r3, 58, 61, 0
+; CHECK: br %r14
+  %orb = or i32 %b, -61
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: rnsbg %r2, %r3, 58, 61, 0
+; CHECK: br %r14
+  %orb = or i64 %b, -61
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with just a left shift.  This can't use RNSBG.
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: sll {{%r[0-5]}}
+; CHECK: nr {{%r[0-5]}}
+; CHECK: br %r14
+  %shrb = shl i32 %b, 20
+  %and = and i32 %a, %shrb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f7(i64 %a, i64 %b) {
+; CHECK-LABEL: f7:
+; CHECK: sllg {{%r[0-5]}}
+; CHECK: ngr {{%r[0-5]}}
+; CHECK: br %r14
+  %shrb = shl i64 %b, 20
+  %and = and i64 %a, %shrb
+  ret i64 %and
+}
+
+; Test a case with just a rotate.  This can't use RNSBG.
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK-LABEL: f8:
+; CHECK: rll {{%r[0-5]}}
+; CHECK: nr {{%r[0-5]}}
+; CHECK: br %r14
+  %shlb = shl i32 %b, 22
+  %shrb = lshr i32 %b, 10
+  %rotlb = or i32 %shlb, %shrb
+  %and = and i32 %a, %rotlb
+  ret i32 %and
+}
+
+; ...and again with i64, which can.
+define i64 @f9(i64 %a, i64 %b) {
+; CHECK-LABEL: f9:
+; CHECK: rnsbg %r2, %r3, 0, 63, 44
+; CHECK: br %r14
+  %shlb = shl i64 %b, 44
+  %shrb = lshr i64 %b, 20
+  %rotlb = or i64 %shlb, %shrb
+  %and = and i64 %a, %rotlb
+  ret i64 %and
+}
+
+; Test a case with a left shift and OR, where the OR covers all shifted bits.
+; We can do the whole thing using RNSBG.
+define i32 @f10(i32 %a, i32 %b) {
+; CHECK-LABEL: f10:
+; CHECK: rnsbg %r2, %r3, 32, 56, 7
+; CHECK: br %r14
+  %shlb = shl i32 %b, 7
+  %orb = or i32 %shlb, 127
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f11(i64 %a, i64 %b) {
+; CHECK-LABEL: f11:
+; CHECK: rnsbg %r2, %r3, 0, 56, 7
+; CHECK: br %r14
+  %shlb = shl i64 %b, 7
+  %orb = or i64 %shlb, 127
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a left shift and OR, where the OR doesn't cover all
+; shifted bits.  We can't use RNSBG for the shift, but we can for the OR
+; and AND.
+define i32 @f12(i32 %a, i32 %b) {
+; CHECK-LABEL: f12:
+; CHECK: sll %r3, 7
+; CHECK: rnsbg %r2, %r3, 32, 57, 0
+; CHECK: br %r14
+  %shlb = shl i32 %b, 7
+  %orb = or i32 %shlb, 63
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f13(i64 %a, i64 %b) {
+; CHECK-LABEL: f13:
+; CHECK: sllg [[REG:%r[01345]]], %r3, 7
+; CHECK: rnsbg %r2, [[REG]], 0, 57, 0
+; CHECK: br %r14
+  %shlb = shl i64 %b, 7
+  %orb = or i64 %shlb, 63
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a right shift and OR, where the OR covers all the shifted
+; bits.  The whole thing can be done using RNSBG.
+define i32 @f14(i32 %a, i32 %b) {
+; CHECK-LABEL: f14:
+; CHECK: rnsbg %r2, %r3, 60, 63, 37
+; CHECK: br %r14
+  %shrb = lshr i32 %b, 27
+  %orb = or i32 %shrb, -16
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f15(i64 %a, i64 %b) {
+; CHECK-LABEL: f15:
+; CHECK: rnsbg %r2, %r3, 60, 63, 5
+; CHECK: br %r14
+  %shrb = lshr i64 %b, 59
+  %orb = or i64 %shrb, -16
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a right shift and OR, where the OR doesn't cover all the
+; shifted bits.  The shift needs to be done separately, but the OR and AND
+; can use RNSBG.
+define i32 @f16(i32 %a, i32 %b) {
+; CHECK-LABEL: f16:
+; CHECK: srl %r3, 29
+; CHECK: rnsbg %r2, %r3, 60, 63, 0
+; CHECK: br %r14
+  %shrb = lshr i32 %b, 29
+  %orb = or i32 %shrb, -16
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f17(i64 %a, i64 %b) {
+; CHECK-LABEL: f17:
+; CHECK: srlg [[REG:%r[01345]]], %r3, 61
+; CHECK: rnsbg %r2, [[REG]], 60, 63, 0
+; CHECK: br %r14
+  %shrb = lshr i64 %b, 61
+  %orb = or i64 %shrb, -16
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a combination involving an ASHR in which the sign bits matter.
+; We can't use RNSBG for the ASHR in that case, but we can for the rest.
+define i32 @f18(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f18:
+; CHECK: sra %r3, 4
+; CHECK: rnsbg %r2, %r3, 32, 62, 1
+; CHECK: br %r14
+  %ashrb = ashr i32 %b, 4
+  store i32 %ashrb, i32 *%dest
+  %shlb = shl i32 %ashrb, 1
+  %orb = or i32 %shlb, 1
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f19(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f19:
+; CHECK: srag [[REG:%r[0145]]], %r3, 34
+; CHECK: rnsbg %r2, [[REG]], 0, 62, 1
+; CHECK: br %r14
+  %ashrb = ashr i64 %b, 34
+  store i64 %ashrb, i64 *%dest
+  %shlb = shl i64 %ashrb, 1
+  %orb = or i64 %shlb, 1
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a combination involving an ASHR in which the sign bits don't matter.
+define i32 @f20(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f20:
+; CHECK: rnsbg %r2, %r3, 48, 62, 48
+; CHECK: br %r14
+  %ashrb = ashr i32 %b, 17
+  store i32 %ashrb, i32 *%dest
+  %shlb = shl i32 %ashrb, 1
+  %orb = or i32 %shlb, -65535
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f21(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f21:
+; CHECK: rnsbg %r2, %r3, 48, 62, 16
+; CHECK: br %r14
+  %ashrb = ashr i64 %b, 49
+  store i64 %ashrb, i64 *%dest
+  %shlb = shl i64 %ashrb, 1
+  %orb = or i64 %shlb, -65535
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a shift, OR, and rotate where the OR covers all shifted bits.
+define i64 @f22(i64 %a, i64 %b) {
+; CHECK-LABEL: f22:
+; CHECK: rnsbg %r2, %r3, 60, 54, 9
+; CHECK: br %r14
+  %shlb = shl i64 %b, 5
+  %orb = or i64 %shlb, 31
+  %shlorb = shl i64 %orb, 4
+  %shrorb = lshr i64 %orb, 60
+  %rotlorb = or i64 %shlorb, %shrorb
+  %and = and i64 %a, %rotlorb
+  ret i64 %and
+}