[llvm] r225203 - [PowerPC] Fold i1 extensions with other ops
Hal Finkel
hfinkel at anl.gov
Mon Jan 5 13:10:24 PST 2015
Author: hfinkel
Date: Mon Jan 5 15:10:24 2015
New Revision: 225203
URL: http://llvm.org/viewvc/llvm-project?rev=225203&view=rev
Log:
[PowerPC] Fold i1 extensions with other ops
Consider this function from our README.txt file:
int foo(int a, int b) { return (a < b) << 4; }
We now explicitly track CR bits by default, so the comment in the README.txt
about not really having a SETCC is no longer accurate, but we did generate this
somewhat silly code:
cmpw 0, 3, 4
li 3, 0
li 12, 1
isel 3, 12, 3, 0
sldi 3, 3, 4
blr
which generates the zext as a select between 0 and 1, and then shifts the
result by a constant amount. Here we preprocess the DAG in order to fold the
results of operations on an extension of an i1 value into the SELECT_I[48]
pseudo instruction when the resulting constant can be materialized using one
instruction (just like the 0 and 1). This was not implemented as a DAGCombine
because the resulting code would have been anti-canonical and depends on
replacing chained user nodes, which does not fit well into the lowering
paradigm. Now we generate:
cmpw 0, 3, 4
li 3, 0
li 12, 16
isel 3, 12, 3, 0
blr
which is less silly.
Added:
llvm/trunk/test/CodeGen/PowerPC/i1-ext-fold.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
llvm/trunk/lib/Target/PowerPC/README.txt
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp?rev=225203&r1=225202&r2=225203&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Mon Jan 5 15:10:24 2015
@@ -217,6 +217,7 @@ private:
void PeepholeCROps();
SDValue combineToCMPB(SDNode *N);
+ void foldBoolExts(SDValue &Res, SDNode *&N);
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
@@ -3173,6 +3174,73 @@ SDValue PPCDAGToDAGISel::combineToCMPB(S
return Res;
}
+// When CR bit registers are enabled, an extension of an i1 variable to a i32
+// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
+// involves constant materialization of a 0 or a 1 or both. If the result of
+// the extension is then operated upon by some operator that can be constant
+// folded with a constant 0 or 1, and that constant can be materialized using
+// only one instruction (like a zero or one), then we should fold in those
+// operations with the select.
+void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
+ if (!PPCSubTarget->useCRBits())
+ return;
+
+ if (N->getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOpcode() != ISD::ANY_EXTEND)
+ return;
+
+ if (N->getOperand(0).getValueType() != MVT::i1)
+ return;
+
+ if (!N->hasOneUse())
+ return;
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Cond = N->getOperand(0);
+ SDValue ConstTrue =
+ CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
+ SDValue ConstFalse = CurDAG->getConstant(0, VT);
+
+ do {
+ SDNode *User = *N->use_begin();
+ if (User->getNumOperands() != 2)
+ break;
+
+ auto TryFold = [this, N, User](SDValue Val) {
+ SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
+ SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
+ SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
+
+ return CurDAG->FoldConstantArithmetic(User->getOpcode(),
+ User->getValueType(0),
+ O0.getNode(), O1.getNode());
+ };
+
+ SDValue TrueRes = TryFold(ConstTrue);
+ if (!TrueRes)
+ break;
+ SDValue FalseRes = TryFold(ConstFalse);
+ if (!FalseRes)
+ break;
+
+ // For us to materialize these using one instruction, we must be able to
+ // represent them as signed 16-bit integers.
+ uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
+ False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
+ if (!isInt<16>(True) || !isInt<16>(False))
+ break;
+
+ // We can replace User with a new SELECT node, and try again to see if we
+ // can fold the select with its user.
+ Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
+ N = User;
+ ConstTrue = TrueRes;
+ ConstFalse = FalseRes;
+ } while (N->hasOneUse());
+}
+
void PPCDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
@@ -3191,6 +3259,9 @@ void PPCDAGToDAGISel::PreprocessISelDAG(
break;
}
+ if (!Res)
+ foldBoolExts(Res, N);
+
if (Res) {
DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
DEBUG(N->dump(CurDAG));
Modified: llvm/trunk/lib/Target/PowerPC/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/README.txt?rev=225203&r1=225202&r2=225203&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/README.txt (original)
+++ llvm/trunk/lib/Target/PowerPC/README.txt Mon Jan 5 15:10:24 2015
@@ -252,23 +252,6 @@ anything though, because the compares st
===-------------------------------------------------------------------------===
-We should custom expand setcc instead of pretending that we have it. That
-would allow us to expose the access of the crbit after the mfcr, allowing
-that access to be trivially folded into other ops. A simple example:
-
-int foo(int a, int b) { return (a < b) << 4; }
-
-compiles into:
-
-_foo:
- cmpw cr7, r3, r4
- mfcr r2, 1
- rlwinm r2, r2, 29, 31, 31
- slwi r3, r2, 4
- blr
-
-===-------------------------------------------------------------------------===
-
Fold add and sub with constant into non-extern, non-weak addresses so this:
static int a;
Added: llvm/trunk/test/CodeGen/PowerPC/i1-ext-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/i1-ext-fold.ll?rev=225203&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/i1-ext-fold.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/i1-ext-fold.ll Mon Jan 5 15:10:24 2015
@@ -0,0 +1,54 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo(i32 signext %a, i32 signext %b) #0 {
+entry:
+ %cmp = icmp slt i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ %shl = shl nuw nsw i32 %conv, 4
+ ret i32 %shl
+
+; CHECK-LABEL: @foo
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 0
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 {
+entry:
+ %cmp = icmp slt i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ %shl = shl nuw nsw i32 %conv, 4
+ %add1 = or i32 %shl, 5
+ ret i32 %add1
+
+; CHECK-LABEL: @foo2
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 5
+; CHECK-DAG: li [[REG2:[0-9]+]], 21
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 {
+entry:
+ %cmp = icmp sle i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ %shl = shl nuw nsw i32 %conv, 4
+ ret i32 %shl
+
+; CHECK-LABEL: @foo3
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK: isel 3, 0, [[REG1]],
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
More information about the llvm-commits
mailing list