[llvm] r328252 - [DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))
Carrot Wei via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 22 16:30:43 PDT 2018
It causes failure in
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-android/builds/8921,
the problem should be
7530 Mask = Mask.zext(VT.getSizeInBits());
the correct code should be
7530 Mask = Mask.zextOrSelf(VT.getSizeInBits());
Trying to reproduce it locally.
On Thu, Mar 22, 2018 at 2:47 PM, Guozhi Wei via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: carrot
> Date: Thu Mar 22 14:47:25 2018
> New Revision: 328252
>
> URL: http://llvm.org/viewvc/llvm-project?rev=328252&view=rev
> Log:
> [DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))
>
> In our real world application, we found the following optimization is missed in DAGCombiner
>
> (zext (and/or/xor (shl/shr (load x), cst), cst)) -> (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
>
> If the user of original zext is an add, it may enable further lea optimization on x86.
>
> This patch add a new function CombineZExtLogicopShiftLoad to do this optimization.
>
> Differential Revision: https://reviews.llvm.org/D44402
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=328252&r1=328251&r2=328252&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar 22 14:47:25 2018
> @@ -426,6 +426,7 @@ namespace {
> unsigned HiOp);
> SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
> SDValue CombineExtLoad(SDNode *N);
> + SDValue CombineZExtLogicopShiftLoad(SDNode *N);
> SDValue combineRepeatedFPDivisors(SDNode *N);
> SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
> SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
> @@ -7470,6 +7471,80 @@ SDValue DAGCombiner::CombineExtLoad(SDNo
> return SDValue(N, 0); // Return N so it doesn't get rechecked!
> }
>
> +// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
> +// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
> +SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
> + assert(N->getOpcode() == ISD::ZERO_EXTEND);
> + EVT VT = N->getValueType(0);
> +
> + // and/or/xor
> + SDValue N0 = N->getOperand(0);
> + if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
> + N0.getOpcode() == ISD::XOR) ||
> + N0.getOperand(1).getOpcode() != ISD::Constant ||
> + (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
> + return SDValue();
> +
> + // shl/shr
> + SDValue N1 = N0->getOperand(0);
> + if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
> + N1.getOperand(1).getOpcode() != ISD::Constant ||
> + (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
> + return SDValue();
> +
> + // load
> + if (!isa<LoadSDNode>(N1.getOperand(0)))
> + return SDValue();
> + LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
> + EVT MemVT = Load->getMemoryVT();
> + if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
> + Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
> + return SDValue();
> +
> +
> + // If the shift op is SHL, the logic op must be AND, otherwise the result
> + // will be wrong.
> + if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
> + return SDValue();
> +
> + if (!N0.hasOneUse() || !N1.hasOneUse())
> + return SDValue();
> +
> + SmallVector<SDNode*, 4> SetCCs;
> + if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
> + ISD::ZERO_EXTEND, SetCCs, TLI))
> + return SDValue();
> +
> + // Actually do the transformation.
> + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
> + Load->getChain(), Load->getBasePtr(),
> + Load->getMemoryVT(), Load->getMemOperand());
> +
> + APInt ShiftCst = cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
> + ShiftCst = ShiftCst.zextOrSelf(VT.getSizeInBits());
> + SDLoc DL1(N1);
> + SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
> + DAG.getConstant(ShiftCst, DL1, VT));
> +
> + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
> + Mask = Mask.zext(VT.getSizeInBits());
> + SDLoc DL0(N0);
> + SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
> + DAG.getConstant(Mask, DL0, VT));
> +
> + ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, SDLoc(Load),
> + ISD::ZERO_EXTEND);
> + CombineTo(N, And);
> + if (SDValue(Load, 0).hasOneUse()) {
> + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
> + } else {
> + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
> + Load->getValueType(0), ExtLoad);
> + CombineTo(Load, Trunc, ExtLoad.getValue(1));
> + }
> + return SDValue(N,0); // Return N so it doesn't get rechecked!
> +}
> +
> /// If we're narrowing or widening the result of a vector select and the final
> /// size is the same size as a setcc (compare) feeding the select, then try to
> /// apply the cast operation to the select's operands because matching vector
> @@ -7988,6 +8063,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SD
> }
> }
>
> + // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
> + // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
> + if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
> + return ZExtLoad;
> +
> // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
> // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
> if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
>
> Added: llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll?rev=328252&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll Thu Mar 22 14:47:25 2018
> @@ -0,0 +1,122 @@
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
> +
> +
> +define i64 @test1(i8* %data) {
> +; CHECK-LABEL: test1:
> +; CHECK: movzbl
> +; CHECK-NEXT: shlq
> +; CHECK-NEXT: andl
> +; CHECK-NEXT: retq
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %bf.clear = shl i8 %bf.load, 2
> + %0 = and i8 %bf.clear, 60
> + %mul = zext i8 %0 to i64
> + ret i64 %mul
> +}
> +
> +define i8* @test2(i8* %data) {
> +; CHECK-LABEL: test2:
> +; CHECK: movzbl
> +; CHECK-NEXT: andl
> +; CHECK-NEXT: leaq
> +; CHECK-NEXT: retq
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %bf.clear = shl i8 %bf.load, 2
> + %0 = and i8 %bf.clear, 60
> + %mul = zext i8 %0 to i64
> + %add.ptr = getelementptr inbounds i8, i8* %data, i64 %mul
> + ret i8* %add.ptr
> +}
> +
> +; If the shift op is SHL, the logic op can only be AND.
> +define i64 @test3(i8* %data) {
> +; CHECK-LABEL: test3:
> +; CHECK: movb
> +; CHECK-NEXT: shlb
> +; CHECK-NEXT: xorb
> +; CHECK-NEXT: movzbl
> +; CHECK-NEXT: retq
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %bf.clear = shl i8 %bf.load, 2
> + %0 = xor i8 %bf.clear, 60
> + %mul = zext i8 %0 to i64
> + ret i64 %mul
> +}
> +
> +define i64 @test4(i8* %data) {
> +; CHECK-LABEL: test4:
> +; CHECK: movzbl
> +; CHECK-NEXT: shrq
> +; CHECK-NEXT: andl
> +; CHECK-NEXT: retq
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %bf.clear = lshr i8 %bf.load, 2
> + %0 = and i8 %bf.clear, 60
> + %1 = zext i8 %0 to i64
> + ret i64 %1
> +}
> +
> +define i64 @test5(i8* %data) {
> +; CHECK-LABEL: test5:
> +; CHECK: movzbl
> +; CHECK-NEXT: shrq
> +; CHECK-NEXT: xorq
> +; CHECK-NEXT: retq
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %bf.clear = lshr i8 %bf.load, 2
> + %0 = xor i8 %bf.clear, 60
> + %1 = zext i8 %0 to i64
> + ret i64 %1
> +}
> +
> +define i64 @test6(i8* %data) {
> +; CHECK-LABEL: test6:
> +; CHECK: movzbl
> +; CHECK-NEXT: shrq
> +; CHECK-NEXT: orq
> +; CHECK-NEXT: retq
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %bf.clear = lshr i8 %bf.load, 2
> + %0 = or i8 %bf.clear, 60
> + %1 = zext i8 %0 to i64
> + ret i64 %1
> +}
> +
> +; Don't do the folding if the other operand isn't a constant.
> +define i64 @test7(i8* %data, i8 %logop) {
> +; CHECK-LABEL: test7:
> +; CHECK: movb
> +; CHECK-NEXT: shrb
> +; CHECK-NEXT: orb
> +; CHECK-NEXT: movzbl
> +; CHECK-NEXT: retq
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %bf.clear = lshr i8 %bf.load, 2
> + %0 = or i8 %bf.clear, %logop
> + %1 = zext i8 %0 to i64
> + ret i64 %1
> +}
> +
> +; Load is folded with sext.
> +define i64 @test8(i8* %data) {
> +; CHECK-LABEL: test8:
> +; CHECK: movsbl
> +; CHECK-NEXT: movzwl
> +; CHECK-NEXT: shrl
> +; CHECK-NEXT: orl
> +entry:
> + %bf.load = load i8, i8* %data, align 4
> + %ext = sext i8 %bf.load to i16
> + %bf.clear = lshr i16 %ext, 2
> + %0 = or i16 %bf.clear, 60
> + %1 = zext i16 %0 to i64
> + ret i64 %1
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list