[llvm] r296537 - [Hexagon] Generate extract instructions more aggressively

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 28 15:27:34 PST 2017


Author: kparzysz
Date: Tue Feb 28 17:27:33 2017
New Revision: 296537

URL: http://llvm.org/viewvc/llvm-project?rev=296537&view=rev
Log:
[Hexagon] Generate extract instructions more aggressively

Added:
    llvm/trunk/test/CodeGen/Hexagon/bit-extract.ll
Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp
    llvm/trunk/test/CodeGen/Hexagon/bit-validate-reg.ll
    llvm/trunk/test/CodeGen/Hexagon/fusedandshift.ll
    llvm/trunk/test/CodeGen/Hexagon/vect/vect-shuffle.ll

Modified: llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp?rev=296537&r1=296536&r2=296537&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp Tue Feb 28 17:27:33 2017
@@ -46,6 +46,12 @@ using namespace llvm;
 
 static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
   cl::init(true), cl::desc("Preserve subregisters in tied operands"));
+static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
+  cl::init(true), cl::desc("Generate extract instructions"));
+
+static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
+  cl::init(UINT_MAX));
+static unsigned CountExtract = 0;
 
 namespace llvm {
 
@@ -1765,6 +1771,8 @@ namespace {
           const BitTracker::RegisterCell &RC);
     bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
           const BitTracker::RegisterCell &RC);
+    bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
 
     const HexagonInstrInfo &HII;
     const HexagonRegisterInfo &HRI;
@@ -2208,6 +2216,196 @@ bool BitSimplification::simplifyTstbit(M
   return false;
 }
 
+// Detect whether RD is a bitfield extract (sign- or zero-extended) of
+// some register from the AVs set. Create a new corresponding instruction
+// at the location of MI. The intent is to recognize situations where
+// a sequence of instructions performs an operation that is equivalent to
+// an extract operation, such as a shift left followed by a shift right.
+bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
+      const RegisterSet &AVs) {
+  if (!GenExtract)
+    return false;
+  if (CountExtract >= MaxExtract)
+    return false;
+  CountExtract++;
+
+  unsigned W = RC.width();
+  unsigned RW = W;
+  unsigned Len;
+  bool Signed;
+
+  // The code is mostly class-independent, except for the part that generates
+  // the extract instruction, and establishes the source register (in case it
+  // needs to use a subregister).
+  const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+  if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
+    return false;
+  assert(RD.Sub == 0);
+
+  // Observation:
+  // If the cell has a form of 00..0xx..x with k zeros and n remaining
+  // bits, this could be an extractu of the n bits, but it could also be
+  // an extractu of a longer field which happens to have 0s in the top
+  // bit positions.
+  // The same logic applies to sign-extended fields.
+  //
+  // Do not check for the extended extracts, since it would expand the
+  // search space quite a bit. The search may be expensive as it is.
+
+  const BitTracker::BitValue &TopV = RC[W-1];
+
+  // Eliminate candidates that have self-referential bits, since they
+  // cannot be extracts from other registers. Also, skip registers that
+  // have compile-time constant values.
+  bool IsConst = true;
+  for (unsigned I = 0; I != W; ++I) {
+    const BitTracker::BitValue &V = RC[I];
+    if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)
+      return false;
+    IsConst = IsConst && (V.is(0) || V.is(1));
+  }
+  if (IsConst)
+    return false;
+
+  if (TopV.is(0) || TopV.is(1)) {
+    bool S = TopV.is(1);
+    for (--W; W > 0 && RC[W-1].is(S); --W)
+      ;
+    Len = W;
+    Signed = S;
+    // The sign bit must be a part of the field being extended.
+    if (Signed)
+      ++Len;
+  } else {
+    // This could still be a sign-extended extract.
+    assert(TopV.Type == BitTracker::BitValue::Ref);
+    if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)
+      return false;
+    for (--W; W > 0 && RC[W-1] == TopV; --W)
+      ;
+    // The top bits of RC are copies of TopV. One occurrence of TopV will
+    // be a part of the field.
+    Len = W + 1;
+    Signed = true;
+  }
+
+  // This would be just a copy. It should be handled elsewhere.
+  if (Len == RW)
+    return false;
+
+  DEBUG({
+    dbgs() << __func__ << " on reg: " << PrintReg(RD.Reg, &HRI, RD.Sub)
+           << ", MI: " << *MI;
+    dbgs() << "Cell: " << RC << '\n';
+    dbgs() << "Expected bitfield size: " << Len << " bits, "
+           << (Signed ? "sign" : "zero") << "-extended\n";
+  });
+
+  bool Changed = false;
+
+  for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
+    const BitTracker::RegisterCell &SC = BT.lookup(R);
+    unsigned SW = SC.width();
+
+    // The source can be longer than the destination, as long as its size is
+    // a multiple of the size of the destination. Also, we would need to be
+    // able to refer to the subregister in the source that would be of the
+    // same size as the destination, but only check the sizes here.
+    if (SW < RW || (SW % RW) != 0)
+      continue;
+
+    // The field can start at any offset in SC as long as it contains Len
+    // bits and does not cross subregister boundary (if the source register
+    // is longer than the destination).
+    unsigned Off = 0;
+    while (Off <= SW-Len) {
+      unsigned OE = (Off+Len)/RW;
+      if (OE != Off/RW) {
+        // The assumption here is that if the source (R) is longer than the
+        // destination, then the destination is a sequence of words of
+        // size RW, and each such word in R can be accessed via a subregister.
+        //
+        // If the beginning and the end of the field cross the subregister
+        // boundary, advance to the next subregister.
+        Off = OE*RW;
+        continue;
+      }
+      if (HBS::isEqual(RC, 0, SC, Off, Len))
+        break;
+      ++Off;
+    }
+
+    if (Off > SW-Len)
+      continue;
+
+    // Found match.
+    unsigned ExtOpc = 0;
+    if (Off == 0) {
+      if (Len == 8)
+        ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
+      else if (Len == 16)
+        ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
+      else if (Len < 10 && !Signed)
+        ExtOpc = Hexagon::A2_andir;
+    }
+    if (ExtOpc == 0) {
+      ExtOpc =
+          Signed ? (RW == 32 ? Hexagon::S4_extract  : Hexagon::S4_extractp)
+                 : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
+    }
+    unsigned SR = 0;
+    // This only recognizes isub_lo and isub_hi.
+    if (RW != SW && RW*2 != SW)
+      continue;
+    if (RW != SW)
+      SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
+
+    if (!validateReg({R,SR}, ExtOpc, 1))
+      continue;
+
+    // Don't generate the same instruction as the one being optimized.
+    if (MI->getOpcode() == ExtOpc) {
+      // All possible ExtOpc's have the source in operand(1).
+      const MachineOperand &SrcOp = MI->getOperand(1);
+      if (SrcOp.getReg() == R)
+        continue;
+    }
+
+    DebugLoc DL = MI->getDebugLoc();
+    MachineBasicBlock &B = *MI->getParent();
+    unsigned NewR = MRI.createVirtualRegister(FRC);
+    auto MIB = BuildMI(B, MI, DL, HII.get(ExtOpc), NewR)
+                  .addReg(R, 0, SR);
+    switch (ExtOpc) {
+      case Hexagon::A2_sxtb:
+      case Hexagon::A2_zxtb:
+      case Hexagon::A2_sxth:
+      case Hexagon::A2_zxth:
+        break;
+      case Hexagon::A2_andir:
+        MIB.addImm((1u << Len) - 1);
+        break;
+      case Hexagon::S4_extract:
+      case Hexagon::S2_extractu:
+      case Hexagon::S4_extractp:
+      case Hexagon::S2_extractup:
+        MIB.addImm(Len)
+           .addImm(Off);
+        break;
+      default:
+        llvm_unreachable("Unexpected opcode");
+    }
+
+    HBS::replaceReg(RD.Reg, NewR, MRI);
+    BT.put(BitTracker::RegisterRef(NewR), RC);
+    Changed = true;
+    break;
+  }
+
+  return Changed;
+}
+
 bool BitSimplification::processBlock(MachineBasicBlock &B,
       const RegisterSet &AVs) {
   if (!BT.reached(&B))
@@ -2245,12 +2443,14 @@ bool BitSimplification::processBlock(Mac
 
     if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
       bool T = genPackhl(MI, RD, RC);
+      T = T || simplifyExtractLow(MI, RD, RC, AVB);
       Changed |= T;
       continue;
     }
 
     if (FRC->getID() == Hexagon::IntRegsRegClassID) {
-      bool T = genExtractHalf(MI, RD, RC);
+      bool T = simplifyExtractLow(MI, RD, RC, AVB);
+      T = T || genExtractHalf(MI, RD, RC);
       T = T || genCombineHalf(MI, RD, RC);
       T = T || genExtractLow(MI, RD, RC);
       Changed |= T;

Added: llvm/trunk/test/CodeGen/Hexagon/bit-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/bit-extract.ll?rev=296537&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/bit-extract.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/bit-extract.ll Tue Feb 28 17:27:33 2017
@@ -0,0 +1,75 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+target triple = "hexagon"
+
+; CHECK-LABEL: ua
+; CHECK: extractu(r0,#26,#0)
+define i32 @ua(i32 %x) local_unnamed_addr #0 {
+entry:
+  %shl = and i32 %x, 67108863
+  ret i32 %shl
+}
+
+; CHECK-LABEL: ub
+; CHECK: extractu(r0,#16,#4)
+define i32 @ub(i32 %x) local_unnamed_addr #0 {
+entry:
+  %0 = lshr i32 %x, 4
+  %shr = and i32 %0, 65535
+  ret i32 %shr
+}
+
+; CHECK-LABEL: uc
+; CHECK: extractu(r0,#24,#0)
+define i32 @uc(i32 %x) local_unnamed_addr #0 {
+entry:
+  %shl = and i32 %x, 16777215
+  ret i32 %shl
+}
+
+; CHECK-LABEL: ud
+; CHECK: extractu(r0,#16,#8)
+define i32 @ud(i32 %x) local_unnamed_addr #0 {
+entry:
+  %bf.lshr = lshr i32 %x, 8
+  %bf.clear = and i32 %bf.lshr, 65535
+  ret i32 %bf.clear
+}
+
+; CHECK-LABEL: sa
+; CHECK: extract(r0,#26,#0)
+define i32 @sa(i32 %x) local_unnamed_addr #0 {
+entry:
+  %shl = shl i32 %x, 6
+  %shr = ashr exact i32 %shl, 6
+  ret i32 %shr
+}
+
+; CHECK-LABEL: sb
+; CHECK: extract(r0,#16,#4)
+define i32 @sb(i32 %x) local_unnamed_addr #0 {
+entry:
+  %shl = shl i32 %x, 12
+  %shr = ashr i32 %shl, 16
+  ret i32 %shr
+}
+
+; CHECK-LABEL: sc
+; CHECK: extract(r0,#24,#0)
+define i32 @sc(i32 %x) local_unnamed_addr #0 {
+entry:
+  %shl = shl i32 %x, 8
+  %shr = ashr exact i32 %shl, 8
+  ret i32 %shr
+}
+
+; CHECK-LABEL: sd
+; CHECK: extract(r0,#16,#8)
+define i32 @sd(i32 %x) local_unnamed_addr #0 {
+entry:
+  %bf.shl = shl i32 %x, 8
+  %bf.ashr = ashr i32 %bf.shl, 16
+  ret i32 %bf.ashr
+}
+
+attributes #0 = { noinline norecurse nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" }

Modified: llvm/trunk/test/CodeGen/Hexagon/bit-validate-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/bit-validate-reg.ll?rev=296537&r1=296536&r2=296537&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/bit-validate-reg.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/bit-validate-reg.ll Tue Feb 28 17:27:33 2017
@@ -1,10 +1,13 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexbit-extract=0 < %s | FileCheck %s
 
 ; Make sure we don't generate zxtb to transfer a predicate register into
 ; a general purpose register.
 
 ; CHECK: r0 = p0
 ; CHECK-NOT: zxtb(p
+; CHECK-NOT: and(p
+; CHECK-NOT: extract(p
+; CHECK-NOT: extractu(p
 
 target triple = "hexagon"
 

Modified: llvm/trunk/test/CodeGen/Hexagon/fusedandshift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/fusedandshift.ll?rev=296537&r1=296536&r2=296537&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/fusedandshift.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/fusedandshift.ll Tue Feb 28 17:27:33 2017
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -hexagon-extract=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-extract=0 -hexbit-extract=0 < %s | FileCheck %s
 ; Check that we generate fused logical and with shift instruction.
 ; Disable "extract" generation, since it may eliminate the and/lsr.
 

Modified: llvm/trunk/test/CodeGen/Hexagon/vect/vect-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/vect-shuffle.ll?rev=296537&r1=296536&r2=296537&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/vect-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/vect-shuffle.ll Tue Feb 28 17:27:33 2017
@@ -1,7 +1,7 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
 
 ; Check that store is post-incremented.
-; CHECK-NOT: extractu
+; CHECK-NOT: extractu(r{{[0-9]+}},#32,
 ; CHECK-NOT: insert
 target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
 target triple = "hexagon"




More information about the llvm-commits mailing list