[llvm] r328252 - [DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))
Guozhi Wei via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 22 14:47:25 PDT 2018
Author: carrot
Date: Thu Mar 22 14:47:25 2018
New Revision: 328252
URL: http://llvm.org/viewvc/llvm-project?rev=328252&view=rev
Log:
[DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))
In our real world application, we found the following optimization is missed in DAGCombiner
(zext (and/or/xor (shl/shr (load x), cst), cst)) -> (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
If the user of original zext is an add, it may enable further lea optimization on x86.
This patch add a new function CombineZExtLogicopShiftLoad to do this optimization.
Differential Revision: https://reviews.llvm.org/D44402
Added:
llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=328252&r1=328251&r2=328252&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar 22 14:47:25 2018
@@ -426,6 +426,7 @@ namespace {
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
+ SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
@@ -7470,6 +7471,80 @@ SDValue DAGCombiner::CombineExtLoad(SDNo
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
+ assert(N->getOpcode() == ISD::ZERO_EXTEND);
+ EVT VT = N->getValueType(0);
+
+ // and/or/xor
+ SDValue N0 = N->getOperand(0);
+ if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) ||
+ N0.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
+ return SDValue();
+
+ // shl/shr
+ SDValue N1 = N0->getOperand(0);
+ if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
+ N1.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
+ return SDValue();
+
+ // load
+ if (!isa<LoadSDNode>(N1.getOperand(0)))
+ return SDValue();
+ LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
+ EVT MemVT = Load->getMemoryVT();
+ if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
+ Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
+ return SDValue();
+
+
+ // If the shift op is SHL, the logic op must be AND, otherwise the result
+ // will be wrong.
+ if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI))
+ return SDValue();
+
+ // Actually do the transformation.
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
+ Load->getChain(), Load->getBasePtr(),
+ Load->getMemoryVT(), Load->getMemOperand());
+
+ APInt ShiftCst = cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+ ShiftCst = ShiftCst.zextOrSelf(VT.getSizeInBits());
+ SDLoc DL1(N1);
+ SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
+ DAG.getConstant(ShiftCst, DL1, VT));
+
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL0(N0);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
+ DAG.getConstant(Mask, DL0, VT));
+
+ ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, SDLoc(Load),
+ ISD::ZERO_EXTEND);
+ CombineTo(N, And);
+ if (SDValue(Load, 0).hasOneUse()) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
+ Load->getValueType(0), ExtLoad);
+ CombineTo(Load, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+}
+
/// If we're narrowing or widening the result of a vector select and the final
/// size is the same size as a setcc (compare) feeding the select, then try to
/// apply the cast operation to the select's operands because matching vector
@@ -7988,6 +8063,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SD
}
}
+ // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+ // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+ if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
+ return ZExtLoad;
+
// fold (zext (zextload x)) -> (zext (truncate (zextload x)))
// fold (zext ( extload x)) -> (zext (truncate (zextload x)))
if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
Added: llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll?rev=328252&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll (added)
+++ llvm/trunk/test/CodeGen/X86/zext-logicop-shift-load.ll Thu Mar 22 14:47:25 2018
@@ -0,0 +1,122 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+
+define i64 @test1(i8* %data) {
+; CHECK-LABEL: test1:
+; CHECK: movzbl
+; CHECK-NEXT: shlq
+; CHECK-NEXT: andl
+; CHECK-NEXT: retq
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %bf.clear = shl i8 %bf.load, 2
+ %0 = and i8 %bf.clear, 60
+ %mul = zext i8 %0 to i64
+ ret i64 %mul
+}
+
+define i8* @test2(i8* %data) {
+; CHECK-LABEL: test2:
+; CHECK: movzbl
+; CHECK-NEXT: andl
+; CHECK-NEXT: leaq
+; CHECK-NEXT: retq
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %bf.clear = shl i8 %bf.load, 2
+ %0 = and i8 %bf.clear, 60
+ %mul = zext i8 %0 to i64
+ %add.ptr = getelementptr inbounds i8, i8* %data, i64 %mul
+ ret i8* %add.ptr
+}
+
+; If the shift op is SHL, the logic op can only be AND.
+define i64 @test3(i8* %data) {
+; CHECK-LABEL: test3:
+; CHECK: movb
+; CHECK-NEXT: shlb
+; CHECK-NEXT: xorb
+; CHECK-NEXT: movzbl
+; CHECK-NEXT: retq
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %bf.clear = shl i8 %bf.load, 2
+ %0 = xor i8 %bf.clear, 60
+ %mul = zext i8 %0 to i64
+ ret i64 %mul
+}
+
+define i64 @test4(i8* %data) {
+; CHECK-LABEL: test4:
+; CHECK: movzbl
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: retq
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %bf.clear = lshr i8 %bf.load, 2
+ %0 = and i8 %bf.clear, 60
+ %1 = zext i8 %0 to i64
+ ret i64 %1
+}
+
+define i64 @test5(i8* %data) {
+; CHECK-LABEL: test5:
+; CHECK: movzbl
+; CHECK-NEXT: shrq
+; CHECK-NEXT: xorq
+; CHECK-NEXT: retq
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %bf.clear = lshr i8 %bf.load, 2
+ %0 = xor i8 %bf.clear, 60
+ %1 = zext i8 %0 to i64
+ ret i64 %1
+}
+
+define i64 @test6(i8* %data) {
+; CHECK-LABEL: test6:
+; CHECK: movzbl
+; CHECK-NEXT: shrq
+; CHECK-NEXT: orq
+; CHECK-NEXT: retq
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %bf.clear = lshr i8 %bf.load, 2
+ %0 = or i8 %bf.clear, 60
+ %1 = zext i8 %0 to i64
+ ret i64 %1
+}
+
+; Don't do the folding if the other operand isn't a constant.
+define i64 @test7(i8* %data, i8 %logop) {
+; CHECK-LABEL: test7:
+; CHECK: movb
+; CHECK-NEXT: shrb
+; CHECK-NEXT: orb
+; CHECK-NEXT: movzbl
+; CHECK-NEXT: retq
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %bf.clear = lshr i8 %bf.load, 2
+ %0 = or i8 %bf.clear, %logop
+ %1 = zext i8 %0 to i64
+ ret i64 %1
+}
+
+; Load is folded with sext.
+define i64 @test8(i8* %data) {
+; CHECK-LABEL: test8:
+; CHECK: movsbl
+; CHECK-NEXT: movzwl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: orl
+entry:
+ %bf.load = load i8, i8* %data, align 4
+ %ext = sext i8 %bf.load to i16
+ %bf.clear = lshr i16 %ext, 2
+ %0 = or i16 %bf.clear, 60
+ %1 = zext i16 %0 to i64
+ ret i64 %1
+}
+
More information about the llvm-commits
mailing list