[llvm] r328252 - [DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))

Thu Mar 22 14:47:25 PDT 2018

Author: carrot
Date: Thu Mar 22 14:47:25 2018
New Revision: 328252

URL: http://llvm.org/viewvc/llvm-project?rev=328252&view=rev
Log:
[DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))

In our real world application, we found the following optimization is missed in DAGCombiner

(zext (and/or/xor (shl/shr (load x), cst), cst)) -> (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))

If the user of original zext is an add, it may enable further lea optimization on x86.

This patch add a new function CombineZExtLogicopShiftLoad to do this optimization.

Differential Revision: https://reviews.llvm.org/D44402


Added:
    llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=328252&r1=328251&r2=328252&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar 22 14:47:25 2018
@@ -426,6 +426,7 @@ namespace {
                                          unsigned HiOp);
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
     SDValue CombineExtLoad(SDNode *N);
+    SDValue CombineZExtLogicopShiftLoad(SDNode *N);
     SDValue combineRepeatedFPDivisors(SDNode *N);
     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
@@ -7470,6 +7471,80 @@ SDValue DAGCombiner::CombineExtLoad(SDNo
   return SDValue(N, 0); // Return N so it doesn't get rechecked!
 }
 
+// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+//      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
+  assert(N->getOpcode() == ISD::ZERO_EXTEND);
+  EVT VT = N->getValueType(0);
+
+  // and/or/xor
+  SDValue N0 = N->getOperand(0);
+  if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+        N0.getOpcode() == ISD::XOR) ||
+      N0.getOperand(1).getOpcode() != ISD::Constant ||
+      (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
+    return SDValue();
+
+  // shl/shr
+  SDValue N1 = N0->getOperand(0);
+  if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
+      N1.getOperand(1).getOpcode() != ISD::Constant ||
+      (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
+    return SDValue();
+
+  // load
+  if (!isa<LoadSDNode>(N1.getOperand(0)))
+    return SDValue();
+  LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
+  EVT MemVT = Load->getMemoryVT();
+  if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
+      Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
+    return SDValue();
+
+
+  // If the shift op is SHL, the logic op must be AND, otherwise the result
+  // will be wrong.
+  if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
+    return SDValue();
+
+  if (!N0.hasOneUse() || !N1.hasOneUse())
+    return SDValue();
+
+  SmallVector<SDNode*, 4> SetCCs;
+  if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
+                               ISD::ZERO_EXTEND, SetCCs, TLI))
+    return SDValue();
+
+  // Actually do the transformation.
+  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
+                                   Load->getChain(), Load->getBasePtr(),
+                                   Load->getMemoryVT(), Load->getMemOperand());
+
+  APInt ShiftCst = cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+  ShiftCst = ShiftCst.zextOrSelf(VT.getSizeInBits());
+  SDLoc DL1(N1);
+  SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
+                              DAG.getConstant(ShiftCst, DL1, VT));
+
+  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+  Mask = Mask.zext(VT.getSizeInBits());
+  SDLoc DL0(N0);
+  SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
+                            DAG.getConstant(Mask, DL0, VT));
+
+  ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, SDLoc(Load),
+                  ISD::ZERO_EXTEND);
+  CombineTo(N, And);
+  if (SDValue(Load, 0).hasOneUse()) {
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
+  } else {
+    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
+                                Load->getValueType(0), ExtLoad);
+    CombineTo(Load, Trunc, ExtLoad.getValue(1));
+  }
+  return SDValue(N,0); // Return N so it doesn't get rechecked!
+}
+
 /// If we're narrowing or widening the result of a vector select and the final
 /// size is the same size as a setcc (compare) feeding the select, then try to
 /// apply the cast operation to the select's operands because matching vector
@@ -7988,6 +8063,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SD
     }
   }
 
+  // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+  //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+  if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
+    return ZExtLoad;
+
   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&

Added: llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll?rev=328252&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll (added)
+++ llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll Thu Mar 22 14:47:25 2018
@@ -0,0 +1,122 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+
+define i64 @test1(i8* %data) {
+; CHECK-LABEL: test1:
+; CHECK:       movzbl
+; CHECK-NEXT:  shlq
+; CHECK-NEXT:  andl
+; CHECK-NEXT:  retq
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %bf.clear = shl i8 %bf.load, 2
+  %0 = and i8 %bf.clear, 60
+  %mul = zext i8 %0 to i64
+  ret i64 %mul
+}
+
+define i8* @test2(i8* %data) {
+; CHECK-LABEL: test2:
+; CHECK:       movzbl
+; CHECK-NEXT:  andl
+; CHECK-NEXT:  leaq
+; CHECK-NEXT:  retq
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %bf.clear = shl i8 %bf.load, 2
+  %0 = and i8 %bf.clear, 60
+  %mul = zext i8 %0 to i64
+  %add.ptr = getelementptr inbounds i8, i8* %data, i64 %mul
+  ret i8* %add.ptr
+}
+
+; If the shift op is SHL, the logic op can only be AND.
+define i64 @test3(i8* %data) {
+; CHECK-LABEL: test3:
+; CHECK:       movb
+; CHECK-NEXT:  shlb
+; CHECK-NEXT:  xorb
+; CHECK-NEXT:  movzbl
+; CHECK-NEXT:  retq
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %bf.clear = shl i8 %bf.load, 2
+  %0 = xor i8 %bf.clear, 60
+  %mul = zext i8 %0 to i64
+  ret i64 %mul
+}
+
+define i64 @test4(i8* %data) {
+; CHECK-LABEL: test4:
+; CHECK:       movzbl
+; CHECK-NEXT:  shrq
+; CHECK-NEXT:  andl
+; CHECK-NEXT:  retq
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %bf.clear = lshr i8 %bf.load, 2
+  %0 = and i8 %bf.clear, 60
+  %1 = zext i8 %0 to i64
+  ret i64 %1
+}
+
+define i64 @test5(i8* %data) {
+; CHECK-LABEL: test5:
+; CHECK:       movzbl
+; CHECK-NEXT:  shrq
+; CHECK-NEXT:  xorq
+; CHECK-NEXT:  retq
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %bf.clear = lshr i8 %bf.load, 2
+  %0 = xor i8 %bf.clear, 60
+  %1 = zext i8 %0 to i64
+  ret i64 %1
+}
+
+define i64 @test6(i8* %data) {
+; CHECK-LABEL: test6:
+; CHECK:       movzbl
+; CHECK-NEXT:  shrq
+; CHECK-NEXT:  orq
+; CHECK-NEXT:  retq
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %bf.clear = lshr i8 %bf.load, 2
+  %0 = or i8 %bf.clear, 60
+  %1 = zext i8 %0 to i64
+  ret i64 %1
+}
+
+; Don't do the folding if the other operand isn't a constant.
+define i64 @test7(i8* %data, i8 %logop) {
+; CHECK-LABEL: test7:
+; CHECK:       movb
+; CHECK-NEXT:  shrb
+; CHECK-NEXT:  orb
+; CHECK-NEXT:  movzbl
+; CHECK-NEXT:  retq
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %bf.clear = lshr i8 %bf.load, 2
+  %0 = or i8 %bf.clear, %logop
+  %1 = zext i8 %0 to i64
+  ret i64 %1
+}
+
+; Load is folded with sext.
+define i64 @test8(i8* %data) {
+; CHECK-LABEL: test8:
+; CHECK:       movsbl
+; CHECK-NEXT:  movzwl
+; CHECK-NEXT:  shrl
+; CHECK-NEXT:  orl
+entry:
+  %bf.load = load i8, i8* %data, align 4
+  %ext = sext i8 %bf.load to i16
+  %bf.clear = lshr i16 %ext, 2
+  %0 = or i16 %bf.clear, 60
+  %1 = zext i16 %0 to i64
+  ret i64 %1
+}
+