[llvm] r351310 - [DAGCombine] Fix ReduceLoadWidth for shifted offsets

Wed Jan 16 00:40:12 PST 2019

Author: sam_parker
Date: Wed Jan 16 00:40:12 2019
New Revision: 351310

URL: http://llvm.org/viewvc/llvm-project?rev=351310&view=rev
Log:
[DAGCombine] Fix ReduceLoadWidth for shifted offsets

ReduceLoadWidth can trigger using a shifted mask is used and this
requires that the function return a shl node to correct for the
offset. However, the way that this was implemented meant that the
returned result could be an existing node, which would be incorrect.
This fixes the method of inserting the new node and replacing uses.

Differential Revision: https://reviews.llvm.org/D50432

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/ARM/and-load-combine.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=351310&r1=351309&r2=351310&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Jan 16 00:40:12 2019
@@ -4724,9 +4724,8 @@ SDValue DAGCombiner::visitAND(SDNode *N)
     if (SDValue Res = ReduceLoadWidth(N)) {
       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
-
       AddToWorklist(N);
-      CombineTo(LN0, Res, Res.getValue(1));
+      DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
       return SDValue(N, 0);
     }
   }
@@ -9486,18 +9485,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDN
     if (DAG.getDataLayout().isBigEndian())
       ShAmt = AdjustBigEndianShift(ShAmt);
 
-    // We're using a shifted mask, so the load now has an offset. This means we
-    // now need to shift right the mask to match the new load and then shift
-    // right the result of the AND.
-    const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
-    APInt ShiftedMask = Mask.lshr(ShAmt);
-    DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT));
+    // We're using a shifted mask, so the load now has an offset. This means
+    // that data has been loaded into the lower bytes than it would have been
+    // before, so we need to shl the loaded data into the correct position in the
+    // register.
     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
-    SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0),
-                                  ShiftC);
-    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted);
-    DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC);
+    Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
   }
+
   // Return the new loaded value.
   return Result;
 }

Modified: llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/and-load-combine.ll?rev=351310&r1=351309&r2=351310&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/and-load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/and-load-combine.ll Wed Jan 16 00:40:12 2019
@@ -1549,3 +1549,39 @@ define arm_aapcscc i64 @test26(i64* noca
   %and = and i64 %1, -281474976710656
   ret i64 %and
 }
+
+; ARM-LABEL: test27:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    ldrb r1, [r0, #1]
+; ARM-NEXT:    lsl r1, r1, #16
+; ARM-NEXT:    str r1, [r0]
+; ARM-NEXT:    bx lr
+;
+; ARMEB-LABEL: test27:
+; ARMEB:     @ %bb.0:
+; ARMEB-NEXT:  ldrb r1, [r0, #2]
+; ARMEB-NEXT:  lsl r1, r1, #16
+; ARMEB-NEXT:  str r1, [r0]
+; ARMEB-NEXT:  bx lr
+;
+; THUMB1-LABEL: test27:
+; THUMB1:     @ %bb.0:
+; THUMB1-NEXT:  ldrb r1, [r0, #1]
+; THUMB1-NEXT:  lsls r1, r1, #16
+; THUMB1-NEXT:  str r1, [r0]
+; THUMB1-NEXT:  bx lr
+;
+; THUMB2-LABEL: test27:
+; THUMB2:       @ %bb.0:
+; THUMB2-NEXT:    ldrb r1, [r0, #1]
+; THUMB2-NEXT:    lsls r1, r1, #16
+; THUMB2-NEXT:    str r1, [r0]
+; THUMB2-NEXT:    bx lr
+define void @test27(i32* nocapture %ptr) {
+entry:
+  %0 = load i32, i32* %ptr, align 4
+  %and = and i32 %0, 65280
+  %shl = shl i32 %and, 8
+  store i32 %shl, i32* %ptr, align 4
+  ret void
+}