[llvm] r328252 - [DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))

Thu Mar 22 16:30:43 PDT 2018

It causes failure in
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-android/builds/8921,
the problem should be

7530   Mask = Mask.zext(VT.getSizeInBits());

the correct code should be

7530   Mask = Mask.zextOrSelf(VT.getSizeInBits());

Trying to reproduce it locally.

On Thu, Mar 22, 2018 at 2:47 PM, Guozhi Wei via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: carrot
> Date: Thu Mar 22 14:47:25 2018
> New Revision: 328252
>
> URL: http://llvm.org/viewvc/llvm-project?rev=328252&view=rev
> Log:
> [DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))
>
> In our real world application, we found the following optimization is missed in DAGCombiner
>
> (zext (and/or/xor (shl/shr (load x), cst), cst)) -> (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
>
> If the user of original zext is an add, it may enable further lea optimization on x86.
>
> This patch add a new function CombineZExtLogicopShiftLoad to do this optimization.
>
> Differential Revision: https://reviews.llvm.org/D44402
>
>
> Added:
>     llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=328252&r1=328251&r2=328252&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar 22 14:47:25 2018
> @@ -426,6 +426,7 @@ namespace {
>                                           unsigned HiOp);
>      SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
>      SDValue CombineExtLoad(SDNode *N);
> +    SDValue CombineZExtLogicopShiftLoad(SDNode *N);
>      SDValue combineRepeatedFPDivisors(SDNode *N);
>      SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
>      SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
> @@ -7470,6 +7471,80 @@ SDValue DAGCombiner::CombineExtLoad(SDNo
>    return SDValue(N, 0); // Return N so it doesn't get rechecked!
>  }
>
> +// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
> +//      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
> +SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
> +  assert(N->getOpcode() == ISD::ZERO_EXTEND);
> +  EVT VT = N->getValueType(0);
> +
> +  // and/or/xor
> +  SDValue N0 = N->getOperand(0);
> +  if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
> +        N0.getOpcode() == ISD::XOR) ||
> +      N0.getOperand(1).getOpcode() != ISD::Constant ||
> +      (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
> +    return SDValue();
> +
> +  // shl/shr
> +  SDValue N1 = N0->getOperand(0);
> +  if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
> +      N1.getOperand(1).getOpcode() != ISD::Constant ||
> +      (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
> +    return SDValue();
> +
> +  // load
> +  if (!isa<LoadSDNode>(N1.getOperand(0)))
> +    return SDValue();
> +  LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
> +  EVT MemVT = Load->getMemoryVT();
> +  if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
> +      Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
> +    return SDValue();
> +
> +
> +  // If the shift op is SHL, the logic op must be AND, otherwise the result
> +  // will be wrong.
> +  if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
> +    return SDValue();
> +
> +  if (!N0.hasOneUse() || !N1.hasOneUse())
> +    return SDValue();
> +
> +  SmallVector<SDNode*, 4> SetCCs;
> +  if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
> +                               ISD::ZERO_EXTEND, SetCCs, TLI))
> +    return SDValue();
> +
> +  // Actually do the transformation.
> +  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
> +                                   Load->getChain(), Load->getBasePtr(),
> +                                   Load->getMemoryVT(), Load->getMemOperand());
> +
> +  APInt ShiftCst = cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
> +  ShiftCst = ShiftCst.zextOrSelf(VT.getSizeInBits());
> +  SDLoc DL1(N1);
> +  SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
> +                              DAG.getConstant(ShiftCst, DL1, VT));
> +
> +  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
> +  Mask = Mask.zext(VT.getSizeInBits());
> +  SDLoc DL0(N0);
> +  SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
> +                            DAG.getConstant(Mask, DL0, VT));
> +
> +  ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, SDLoc(Load),
> +                  ISD::ZERO_EXTEND);
> +  CombineTo(N, And);
> +  if (SDValue(Load, 0).hasOneUse()) {
> +    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
> +  } else {
> +    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
> +                                Load->getValueType(0), ExtLoad);
> +    CombineTo(Load, Trunc, ExtLoad.getValue(1));
> +  }
> +  return SDValue(N,0); // Return N so it doesn't get rechecked!
> +}
> +
>  /// If we're narrowing or widening the result of a vector select and the final
>  /// size is the same size as a setcc (compare) feeding the select, then try to
>  /// apply the cast operation to the select's operands because matching vector
> @@ -7988,6 +8063,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SD
>      }
>    }
>
> +  // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
> +  //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
> +  if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
> +    return ZExtLoad;
> +
>    // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
>    // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
>    if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
>
> Added: llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll?rev=328252&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll Thu Mar 22 14:47:25 2018
> @@ -0,0 +1,122 @@
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
> +
> +
> +define i64 @test1(i8* %data) {
> +; CHECK-LABEL: test1:
> +; CHECK:       movzbl
> +; CHECK-NEXT:  shlq
> +; CHECK-NEXT:  andl
> +; CHECK-NEXT:  retq
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %bf.clear = shl i8 %bf.load, 2
> +  %0 = and i8 %bf.clear, 60
> +  %mul = zext i8 %0 to i64
> +  ret i64 %mul
> +}
> +
> +define i8* @test2(i8* %data) {
> +; CHECK-LABEL: test2:
> +; CHECK:       movzbl
> +; CHECK-NEXT:  andl
> +; CHECK-NEXT:  leaq
> +; CHECK-NEXT:  retq
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %bf.clear = shl i8 %bf.load, 2
> +  %0 = and i8 %bf.clear, 60
> +  %mul = zext i8 %0 to i64
> +  %add.ptr = getelementptr inbounds i8, i8* %data, i64 %mul
> +  ret i8* %add.ptr
> +}
> +
> +; If the shift op is SHL, the logic op can only be AND.
> +define i64 @test3(i8* %data) {
> +; CHECK-LABEL: test3:
> +; CHECK:       movb
> +; CHECK-NEXT:  shlb
> +; CHECK-NEXT:  xorb
> +; CHECK-NEXT:  movzbl
> +; CHECK-NEXT:  retq
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %bf.clear = shl i8 %bf.load, 2
> +  %0 = xor i8 %bf.clear, 60
> +  %mul = zext i8 %0 to i64
> +  ret i64 %mul
> +}
> +
> +define i64 @test4(i8* %data) {
> +; CHECK-LABEL: test4:
> +; CHECK:       movzbl
> +; CHECK-NEXT:  shrq
> +; CHECK-NEXT:  andl
> +; CHECK-NEXT:  retq
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %bf.clear = lshr i8 %bf.load, 2
> +  %0 = and i8 %bf.clear, 60
> +  %1 = zext i8 %0 to i64
> +  ret i64 %1
> +}
> +
> +define i64 @test5(i8* %data) {
> +; CHECK-LABEL: test5:
> +; CHECK:       movzbl
> +; CHECK-NEXT:  shrq
> +; CHECK-NEXT:  xorq
> +; CHECK-NEXT:  retq
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %bf.clear = lshr i8 %bf.load, 2
> +  %0 = xor i8 %bf.clear, 60
> +  %1 = zext i8 %0 to i64
> +  ret i64 %1
> +}
> +
> +define i64 @test6(i8* %data) {
> +; CHECK-LABEL: test6:
> +; CHECK:       movzbl
> +; CHECK-NEXT:  shrq
> +; CHECK-NEXT:  orq
> +; CHECK-NEXT:  retq
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %bf.clear = lshr i8 %bf.load, 2
> +  %0 = or i8 %bf.clear, 60
> +  %1 = zext i8 %0 to i64
> +  ret i64 %1
> +}
> +
> +; Don't do the folding if the other operand isn't a constant.
> +define i64 @test7(i8* %data, i8 %logop) {
> +; CHECK-LABEL: test7:
> +; CHECK:       movb
> +; CHECK-NEXT:  shrb
> +; CHECK-NEXT:  orb
> +; CHECK-NEXT:  movzbl
> +; CHECK-NEXT:  retq
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %bf.clear = lshr i8 %bf.load, 2
> +  %0 = or i8 %bf.clear, %logop
> +  %1 = zext i8 %0 to i64
> +  ret i64 %1
> +}
> +
> +; Load is folded with sext.
> +define i64 @test8(i8* %data) {
> +; CHECK-LABEL: test8:
> +; CHECK:       movsbl
> +; CHECK-NEXT:  movzwl
> +; CHECK-NEXT:  shrl
> +; CHECK-NEXT:  orl
> +entry:
> +  %bf.load = load i8, i8* %data, align 4
> +  %ext = sext i8 %bf.load to i16
> +  %bf.clear = lshr i16 %ext, 2
> +  %0 = or i16 %bf.clear, 60
> +  %1 = zext i16 %0 to i64
> +  ret i64 %1
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits