[llvm] r296196 - [Hexagon] Undo shift folding where it could simplify addressing mode
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 24 15:34:24 PST 2017
Author: kparzysz
Date: Fri Feb 24 17:34:24 2017
New Revision: 296196
URL: http://llvm.org/viewvc/llvm-project?rev=296196&view=rev
Log:
[Hexagon] Undo shift folding where it could simplify addressing mode
For example, avoid (single shift):
r0 = and(##536870908,lsr(r0,#3))
r0 = memw(r1+r0<<#0)
in favor of (two shifts):
r0 = lsr(r0,#5)
r0 = memw(r1+r0<<#2)
Added:
llvm/trunk/test/CodeGen/Hexagon/undo-dag-shift.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp?rev=296196&r1=296195&r2=296196&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp Fri Feb 24 17:34:24 2017
@@ -1023,8 +1023,8 @@ void HexagonDAGToDAGISel::PreprocessISel
}
}
- // Transform: (store ch addr (add x (add (shl y c) e)))
- // to: (store ch addr (add x (shl (add y d) c))),
+ // Transform: (store ch val (add x (add (shl y c) e)))
+ // to: (store ch val (add x (shl (add y d) c))),
// where e = (shl d c) for some integer d.
// The purpose of this is to enable generation of loads/stores with
// shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
@@ -1033,7 +1033,7 @@ void HexagonDAGToDAGISel::PreprocessISel
if (I->getOpcode() != ISD::STORE)
continue;
- // I matched: (store ch addr Off)
+ // I matched: (store ch val Off)
SDValue Off = I->getOperand(2);
// Off needs to match: (add x (add (shl y c) (shl d c))))
if (Off.getOpcode() != ISD::ADD)
@@ -1076,6 +1076,78 @@ void HexagonDAGToDAGISel::PreprocessISel
ReplaceNode(T0.getNode(), NewShl.getNode());
}
+ // Transform (load ch (add x (and (srl y c) Mask)))
+ // to: (load ch (add x (shl (srl y d) d-c)))
+ // where
+ // Mask = 00..0 111..1 0.0
+ // | | +-- d-c 0s, and d-c is 0, 1 or 2.
+ // | +-------- 1s
+ // +-------------- at most c 0s
+ // Motivating example:
+ // DAG combiner optimizes (add x (shl (srl y 5) 2))
+ // to (add x (and (srl y 3) 1FFFFFFC))
+ // which results in a constant-extended and(##...,lsr). This transformation
+ // undoes this simplification for cases where the shl can be folded into
+ // an addressing mode.
+ for (SDNode *N : Nodes) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::LOAD && Opc != ISD::STORE)
+ continue;
+ SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
+ // Addr must match: (add x T0)
+ if (Addr.getOpcode() != ISD::ADD)
+ continue;
+ SDValue T0 = Addr.getOperand(1);
+ // T0 must match: (and T1 Mask)
+ if (T0.getOpcode() != ISD::AND)
+ continue;
+
+ // We have an AND.
+ //
+ // Check the first operand. It must be: (srl y c).
+ SDValue S = T0.getOperand(0);
+ if (S.getOpcode() != ISD::SRL)
+ continue;
+ ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
+ if (SN == nullptr)
+ continue;
+ if (SN->getAPIntValue().getBitWidth() != 32)
+ continue;
+ uint32_t CV = SN->getZExtValue();
+
+ // Check the second operand: the supposed mask.
+ ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
+ if (MN == nullptr)
+ continue;
+ if (MN->getAPIntValue().getBitWidth() != 32)
+ continue;
+ uint32_t Mask = MN->getZExtValue();
+ // Examine the mask.
+ uint32_t TZ = countTrailingZeros(Mask);
+ uint32_t M1 = countTrailingOnes(Mask >> TZ);
+ uint32_t LZ = countLeadingZeros(Mask);
+ // Trailing zeros + middle ones + leading zeros must equal the width.
+ if (TZ + M1 + LZ != 32)
+ continue;
+ // The number of trailing zeros will be encoded in the addressing mode.
+ if (TZ > 2)
+ continue;
+ // The number of leading zeros must be at most c.
+ if (LZ > CV)
+ continue;
+
+ // All looks good.
+ SDValue Y = S.getOperand(0);
+ EVT VT = Addr.getValueType();
+ SDLoc dl(S);
+ // TZ = D-C, so D = TZ+C.
+ SDValue D = DAG.getConstant(TZ+CV, dl, VT);
+ SDValue DC = DAG.getConstant(TZ, dl, VT);
+ SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
+ SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
+ ReplaceNode(T0.getNode(), NewShl.getNode());
+ }
+
if (EnableAddressRebalancing) {
rebalanceAddressTrees();
Added: llvm/trunk/test/CodeGen/Hexagon/undo-dag-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/undo-dag-shift.ll?rev=296196&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/undo-dag-shift.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/undo-dag-shift.ll Fri Feb 24 17:34:24 2017
@@ -0,0 +1,59 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; DAG combiner folds sequences of shifts, which can sometimes obscure
+; optimization opportunities. For example
+;
+; unsigned int c(unsigned int b, unsigned int *a) {
+; unsigned int bitidx = b >> 5;
+; return a[bitidx];
+; }
+;
+; produces
+; (add x (shl (srl y 5) 2))
+; which is then folded into
+; (add x (and (srl y 3) 1FFFFFFC))
+;
+; That results in a constant-extended and:
+; r0 = and(##536870908,lsr(r0,#3))
+; r0 = memw(r1+r0<<#0)
+; whereas
+; r0 = lsr(r0,#5)
+; r0 = memw(r1+r0<<#2)
+; is more desirable.
+
+target triple = "hexagon"
+
+; CHECK-LABEL: load_0
+; CHECK: memw(r{{[0-9]+}}+r{{[0-9]}}<<#2)
+define i32 @load_0(i32 %b, i32* nocapture readonly %a) #0 {
+entry:
+ %shr = lshr i32 %b, 5
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %shr
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; This would require r0<<#3, which is not legal.
+; CHECK-LABEL: load_1
+; CHECK: memw(r{{[0-9]+}}+r{{[0-9]}}<<#0)
+define i32 @load_1(i32 %b, [3 x i32]* nocapture readonly %a) #0 {
+entry:
+ %shr = lshr i32 %b, 5
+ %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* %a, i32 %shr, i32 0
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; CHECK-LABEL: store_0
+; CHECK: memw(r{{[0-9]+}}+r{{[0-9]}}<<#2)
+define void @store_0(i32 %b, i32* nocapture %a, i32 %v) #1 {
+entry:
+ %shr = lshr i32 %b, 5
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %shr
+ store i32 %v, i32* %arrayidx, align 4
+ ret void
+}
+
+attributes #0 = { norecurse nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" }
+attributes #1 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" }
+
More information about the llvm-commits
mailing list