[PATH][CodeGen] Fix CombineToPostIndexedLoadStore in DAGCombiner.cpp
Francois de Ferriere
francois.de-ferriere at st.com
Fri Apr 3 06:13:21 PDT 2015
Hi,
I post a patch for a missed optimization in the DAGCombiner.cpp LLVM
file for the selection of Post Indexed Load and Store operations.
The code in the function CombineToPostIndexedLoadStore that checks
the uses of an ADD/SUB operation does not correctly check the real uses.
Also, in the function canFoldInAddressingMode, VT is computed as the
type of the destination/source of a LOAD/STORE operations, instead of
the memory type of the operation. On targets which have a scaling factor
on the offset of the LOAD/STORE operations, the function may return
false for actually valid cases.
I could reproduce this problem on the ARM target, a test case that
exposes this problem is attached to this mail.
Thanks to review it and commit it when it is OK.
Regards,
- François.
-------------- next part --------------
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 233547)
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp (working copy)
@@ -8560,11 +8560,11 @@
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
- VT = Use->getValueType(0);
+ VT = LD->getMemoryVT();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
- VT = ST->getValue().getValueType();
+ VT = ST->getMemoryVT();
} else
return false;
@@ -8852,8 +8852,7 @@
return false;
for (SDNode *Op : Ptr.getNode()->uses()) {
- if (Op == N ||
- (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
continue;
SDValue BasePtr;
@@ -8876,28 +8875,18 @@
continue;
// Check for #1.
- bool TryNext = false;
- for (SDNode *Use : BasePtr.getNode()->uses()) {
- if (Use == Ptr.getNode())
- continue;
-
- // If all the uses are load / store addresses, then don't do the
- // transformation.
- if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
- bool RealUse = false;
- for (SDNode *UseUse : Use->uses()) {
- if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
- RealUse = true;
- }
-
- if (!RealUse) {
- TryNext = true;
- break;
- }
+ // Look for a RealUse, i.e. one use that is not a load / store op, or one
+ // that cannot be folded as addressing mode
+ // If one use is not a load / store address, then do the transformation.
+ bool RealUse = false;
+ for (SDNode *OpUse : Op->uses()) {
+ if (!canFoldInAddressingMode(Op, OpUse, DAG, TLI)) {
+ RealUse = true;
+ break;
}
}
- if (TryNext)
+ if (!RealUse)
continue;
// Check for #2
-------------- next part --------------
; Test that checks that automod addressing mode is selected
;
; llc -march=arm automod_test.ll
;
; ======================================================
; Without the fix, the generated code is the following :
;
; ldrh r3, [r2, #2]
; strh r3, [r1, #-2]
; ldrh r3, [r2]
; sub r2, r2, #6
; strh r3, [r1]
; ldr r3, [r0], #48
; add r1, r1, #6
; cmp r3, #0
; bne .LBB0_1
;
; With the patch, post modifying addressing modes are selected :
;
; ldrh r3, [r2, #2]
; strh r3, [r1, #-2]
; ldrh r3, [r2], #-6
; strh r3, [r1], #6
; ldr r3, [r0], #48
; cmp r3, #0
; bne .LBB0_1
; ======================================================
@input_tab64 = common global [32 x i16] zeroinitializer, align 2
@output_tab64 = common global [32 x i16] zeroinitializer, align 2
; Function Attrs: nounwind
define void @compute(i32* nocapture readonly %IDX) #0 {
entry:
%0 = load i32* %IDX, align 4
%tobool14 = icmp eq i32 %0, 0
br i1 %tobool14, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%i.015 = phi i32 [ %add8, %for.body ], [ 0, %entry ]
%sub = sub nsw i32 32, %i.015
%sub1 = add nsw i32 %sub, -1
%arrayidx2 = getelementptr inbounds [32 x i16]* @input_tab64, i32 0, i32 %sub1
%1 = load i16* %arrayidx2, align 2
%arrayidx3 = getelementptr inbounds [32 x i16]* @output_tab64, i32 0, i32 %i.015
store i16 %1, i16* %arrayidx3, align 2
%sub5 = add nsw i32 %sub, -2
%arrayidx6 = getelementptr inbounds [32 x i16]* @input_tab64, i32 0, i32 %sub5
%2 = load i16* %arrayidx6, align 2
%add = add nsw i32 %i.015, 1
%arrayidx7 = getelementptr inbounds [32 x i16]* @output_tab64, i32 0, i32 %add
store i16 %2, i16* %arrayidx7, align 2
%add8 = add nsw i32 %i.015, 3
%shl = shl i32 %add8, 2
%arrayidx = getelementptr inbounds i32* %IDX, i32 %shl
%3 = load i32* %arrayidx, align 4
%tobool = icmp eq i32 %3, 0
br i1 %tobool, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
More information about the llvm-commits
mailing list