[PATCH] D37514: [PowerPC] support ZERO_EXTEND in tryBitPermutation

Hiroshi Inoue via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 8 05:32:54 PDT 2017


inouehrs updated this revision to Diff 114342.
inouehrs retitled this revision from "[PowerPC] DAGCombine for better exploitation of rotate-and-mask instruction" to "[PowerPC] support ZERO_EXTEND in tryBitPermutation".
inouehrs edited the summary of this revision.
inouehrs added a comment.

reimplemented the optimization in tryBitPermutation instead of adding new DAGCombine rule as @hfinkel suggested.


https://reviews.llvm.org/D37514

Files:
  lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  test/CodeGen/PowerPC/zext-bitperm.ll


Index: test/CodeGen/PowerPC/zext-bitperm.ll
===================================================================
--- /dev/null
+++ test/CodeGen/PowerPC/zext-bitperm.ll
@@ -0,0 +1,23 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s
+
+; Test case for PPCTargetLowering::extendSubTreeForBitPermutation.
+; We expect mask and rotate are folded into a rlwinm instruction.
+
+define zeroext i32 @func(i32* %p, i32 zeroext %i) {
+; CHECK-LABEL: @func
+; CHECK: addi [[REG1:[0-9]+]], 4, 1
+; CHECK: rlwinm [[REG2:[0-9]+]], [[REG1]], 2, 22, 29
+; CHECK-NOT: sldi
+; CHECK: lwzx 3, 3, [[REG2]]
+; CHECK: blr
+entry:
+  %add = add i32 %i, 1
+  %and = and i32 %add, 255
+  %idxprom = zext i32 %and to i64
+  %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1103,6 +1103,65 @@
 
       return std::make_pair(Interesting = true, &Bits);
     }
+    case ISD::ZERO_EXTEND: {
+      // Here we try to increase the opportunity by reordering ZEXT and ANDI.
+      // We want to avoid ZEXT between two nodes that can be folded.
+      //
+      // For example, we modify these nodes
+      //           t9: i32 = add t7, Constant:i32<1>
+      //         t11: i32 = and t9, Constant:i32<255>
+      //       t12: i64 = zero_extend t11
+      //     t14: i64 = shl t12, Constant:i64<2>
+      // into
+      //           t9: i32 = add t7, Constant:i32<1>
+      //         t25: i64 = any_extend t9
+      //       t27: i64 = and t25, Constant:i64<255>
+      //     t14: i64 = shl t12, Constant:i64<2>
+      // to fold t27 and t14 into a rotate-and-mask instruction.
+      // Such case often happens in array accesses with logical AND operation
+      // in an index, e.g. array[i & 0xFF];
+      //
+      // We modify nodes only if the first operand of AND node (t9 in example)
+      // is not a supported logical opcode in tryBitPermutation to
+      // avoid distubing another tryBitPermutation.
+
+      auto IsSupprtedInBitPermutation = [&](unsigned Opcode) {
+        return (Opcode == ISD::AND || Opcode == ISD::OR  ||
+                Opcode == ISD::SHL || Opcode == ISD::SRL ||
+                Opcode == ISD::ROTL);
+      };
+
+      // We support only the case with zero extension from i32 to i64.
+      // Also we do not optimize if ZEXT or ANDI node has multiple uses.
+      if (V.getValueType() != MVT::i64 || !V.hasOneUse())
+        break;
+
+      SDValue AndVal = V.getOperand(0);
+      if (AndVal.getOpcode() != ISD::AND ||
+          AndVal.getValueType() != MVT::i32 ||
+          !AndVal.hasOneUse() ||
+          !isa<ConstantSDNode>(AndVal.getOperand(1)) ||
+          IsSupprtedInBitPermutation(AndVal.getOperand(0).getOpcode()))
+        break;
+
+      uint64_t Mask = AndVal.getConstantOperandVal(1);
+      if ((Mask & 0xFFFFFFFF00000000uLL) != 0)
+        break;
+
+      SDLoc DL(V.getNode());
+      SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, DL, MVT::i32);
+      SDValue Mask64 = CurDAG->getConstant(Mask, DL, MVT::i64);
+      SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, DL,
+                                                     MVT::i64), 0);
+      SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, DL,
+                                                     MVT::i64, ImDef,
+                                                     AndVal.getOperand(0),
+                                                     SubRegIdx), 0);
+      SDValue And64Val = CurDAG->getNode(ISD::AND, DL, MVT::i64, ExtVal,
+                                         Mask64);
+
+      return getValueBits(And64Val, NumBits);
+      }
     }
 
     for (unsigned i = 0; i < NumBits; ++i)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37514.114342.patch
Type: text/x-patch
Size: 4142 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170908/643103f4/attachment.bin>


More information about the llvm-commits mailing list