[llvm] r252606 - Reapply "[ARM] Combine CMOV into BFI where possible"
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 13 05:36:13 PST 2015
Hi James,
I've found a codegen fault with csmith that seems to be caused by this commit. Here's the reproducer for it:
#include <stdio.h>
unsigned a = 1;
struct {
unsigned f0 : 1;
unsigned f1 : 16;
} d = {0, 0}, f = {0, 0};
__attribute((noinline))
void foo() {
f.f0 = d.f0 < (a | 1);
}
int main() {
foo();
printf("checksum = %X\n", f.f0);
}
This should print "checksum = 1" because the comparison in foo will be true (d.f0 is 0, (a | 1) is 1). However, with this patch applied, it prints "checksum = 0".
The foo function is being miscompiled, here's the IR for that function:
@a = global i32 1, align 4
@d = global { i8, i8, i8, i8 } { i8 0, i8 0, i8 0, i8 undef }, align 4
@f = global { i8, i8, i8, i8 } { i8 0, i8 0, i8 0, i8 undef }, align 4
define void @foo() noinline nounwind {
entry:
%bf.load = load i32, i32* bitcast ({ i8, i8, i8, i8 }* @d to i32*), align 4
%bf.clear = and i32 %bf.load, 1
%0 = load i32, i32* @a, align 4, !tbaa !3
%or = or i32 %0, 1
%cmp = icmp ne i32 %bf.clear, %or
%conv = zext i1 %cmp to i32
%bf.load1 = load i32, i32* bitcast ({ i8, i8, i8, i8 }* @f to i32*), align 4
%bf.clear2 = and i32 %bf.load1, -2
%bf.set = or i32 %conv, %bf.clear2
store i32 %bf.set, i32* bitcast ({ i8, i8, i8, i8 }* @f to i32*), align 4
ret void
}
Here's the assembly I get for this function (with this patch applied):
foo:
movw r0, :lower16:f
movw r2, :lower16:d
movt r0, :upper16:f
movt r2, :upper16:d
ldr r1, [r0]
ldr r2, [r2]
bfc r1, #0, #1
bfi r1, r2, #0, #1
str r1, [r0]
bx lr
Oliver
> -----Original Message-----
> From: llvm-commits [mailto:llvm-commits-bounces at lists.llvm.org] On Behalf
> Of James Molloy via llvm-commits
> Sent: 10 November 2015 14:22
> To: llvm-commits at lists.llvm.org
> Subject: [llvm] r252606 - Reapply "[ARM] Combine CMOV into BFI where
> possible"
>
> Author: jamesm
> Date: Tue Nov 10 08:22:05 2015
> New Revision: 252606
>
> URL: http://llvm.org/viewvc/llvm-project?rev=252606&view=rev
> Log:
> Reapply "[ARM] Combine CMOV into BFI where possible"
>
> Added fixes for stage2 failures: CMOV is not commutable; commuting the
> operands results in the condition being flipped! d'oh!
>
> Original commit message:
>
> If we have a CMOV, OR and AND combination such as:
> if (x & CN)
> y |= CM;
>
> And:
> * CN is a single bit;
> * All bits covered by CM are known zero in y;
>
> Then we can convert this to a sequence of BFI instructions. This will
> always be a win if CM is a single bit, will always be no worse than the
> TST & OR sequence if CM is two bits, and for thumb will be no worse if CM
> is three bits (due to the extra IT instruction).
>
> Modified:
> llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> llvm/trunk/test/CodeGen/ARM/bfi.ll
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=252606&r1=252605
> &r2=252606&view=diff
> ==========================================================================
> ====
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Nov 10 08:22:05 2015
> @@ -10241,6 +10241,111 @@ static SDValue PerformExtendCombine(SDNo
> return SDValue();
> }
>
> +static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt
> &KnownZero,
> + APInt &KnownOne) {
> + if (Op.getOpcode() == ARMISD::BFI) {
> + // Conservatively, we can recurse down the first operand
> + // and just mask out all affected bits.
> + computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
> +
> + // The operand to BFI is already a mask suitable for removing the
> bits it
> + // sets.
> + ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
> + APInt Mask = CI->getAPIntValue();
> + KnownZero &= Mask;
> + KnownOne &= Mask;
> + return;
> + }
> + if (Op.getOpcode() == ARMISD::CMOV) {
> + APInt KZ2(KnownZero.getBitWidth(), 0);
> + APInt KO2(KnownOne.getBitWidth(), 0);
> + computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
> + computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
> +
> + KnownZero &= KZ2;
> + KnownOne &= KO2;
> + return;
> + }
> + return DAG.computeKnownBits(Op, KnownZero, KnownOne);
> +}
> +
> +SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV,
> SelectionDAG &DAG) const {
> + // If we have a CMOV, OR and AND combination such as:
> + // if (x & CN)
> + // y |= CM;
> + //
> + // And:
> + // * CN is a single bit;
> + // * All bits covered by CM are known zero in y
> + //
> + // Then we can convert this into a sequence of BFI instructions. This
> will
> + // always be a win if CM is a single bit, will always be no worse than
> the
> + // TST&OR sequence if CM is two bits, and for thumb will be no worse if
> CM is
> + // three bits (due to the extra IT instruction).
> +
> + SDValue Op0 = CMOV->getOperand(0);
> + SDValue Op1 = CMOV->getOperand(1);
> + SDValue CmpZ = CMOV->getOperand(4);
> +
> + assert(CmpZ->getOpcode() == ARMISD::CMPZ);
> + SDValue And = CmpZ->getOperand(0);
> + if (And->getOpcode() != ISD::AND)
> + return SDValue();
> + ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
> + if (!AndC || !AndC->getAPIntValue().isPowerOf2())
> + return SDValue();
> + SDValue X = And->getOperand(0);
> +
> + if (Op1->getOpcode() != ISD::OR)
> + return SDValue();
> +
> + ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
> + if (!OrC)
> + return SDValue();
> + SDValue Y = Op1->getOperand(0);
> +
> + if (Op0 != Y)
> + return SDValue();
> +
> + // Now, is it profitable to continue?
> + APInt OrCI = OrC->getAPIntValue();
> + unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
> + if (OrCI.countPopulation() > Heuristic)
> + return SDValue();
> +
> + // Lastly, can we determine that the bits defined by OrCI
> + // are zero in Y?
> + APInt KnownZero, KnownOne;
> + computeKnownBits(DAG, Y, KnownZero, KnownOne);
> + if ((OrCI & KnownZero) != OrCI)
> + return SDValue();
> +
> + // OK, we can do the combine.
> + SDValue V = Y;
> + SDLoc dl(X);
> + EVT VT = X.getValueType();
> + unsigned BitInX = AndC->getAPIntValue().logBase2();
> +
> + if (BitInX != 0) {
> + // We must shift X first.
> + X = DAG.getNode(ISD::SRL, dl, VT, X,
> + DAG.getConstant(BitInX, dl, VT));
> + }
> +
> + for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
> + BitInY < NumActiveBits; ++BitInY) {
> + if (OrCI[BitInY] == 0)
> + continue;
> + APInt Mask(VT.getSizeInBits(), 0);
> + Mask.setBit(BitInY);
> + V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
> + // Confusingly, the operand is an *inverted* mask.
> + DAG.getConstant(~Mask, dl, VT));
> + }
> +
> + return V;
> +}
> +
> /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
> SDValue
> ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
> {
> @@ -10259,6 +10364,13 @@ ARMTargetLowering::PerformCMOVCombine(SD
> ARMCC::CondCodes CC =
> (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
>
> + // BFI is only available on V6T2+.
> + if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
> + SDValue R = PerformCMOVToBFICombine(N, DAG);
> + if (R)
> + return R;
> + }
> +
> // Simplify
> // mov r1, r0
> // cmp r1, x
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=252606&r1=252605&r
> 2=252606&view=diff
> ==========================================================================
> ====
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue Nov 10 08:22:05 2015
> @@ -260,6 +260,7 @@ namespace llvm {
> SDNode *Node) const override;
>
> SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
> + SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
> SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
> override;
>
> bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const
> override;
>
> Modified: llvm/trunk/test/CodeGen/ARM/bfi.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/CodeGen/ARM/bfi.ll?rev=252606&r1=252605&r2=252606&
> view=diff
> ==========================================================================
> ====
> --- llvm/trunk/test/CodeGen/ARM/bfi.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM/bfi.ll Tue Nov 10 08:22:05 2015
> @@ -74,3 +74,37 @@ entry:
> %or = or i32 %shl, %and
> ret i32 %or
> }
> +
> +define i32 @f7(i32 %x, i32 %y) {
> +; CHECK-LABEL: f7:
> +; CHECK: bfi r1, r0, #4, #1
> + %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
> + %and = and i32 %x, 4
> + %or = or i32 %y2, 16
> + %cmp = icmp ne i32 %and, 0
> + %sel = select i1 %cmp, i32 %or, i32 %y2
> + ret i32 %sel
> +}
> +
> +define i32 @f8(i32 %x, i32 %y) {
> +; CHECK-LABEL: f8:
> +; CHECK: bfi r1, r0, #4, #1
> +; CHECK: bfi r1, r0, #5, #1
> + %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
> + %and = and i32 %x, 4
> + %or = or i32 %y2, 48
> + %cmp = icmp ne i32 %and, 0
> + %sel = select i1 %cmp, i32 %or, i32 %y2
> + ret i32 %sel
> +}
> +
> +define i32 @f9(i32 %x, i32 %y) {
> +; CHECK-LABEL: f9:
> +; CHECK-NOT: bfi
> + %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
> + %and = and i32 %x, 4
> + %or = or i32 %y2, 48
> + %cmp = icmp ne i32 %and, 0
> + %sel = select i1 %cmp, i32 %y2, i32 %or
> + ret i32 %sel
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
________________________________
-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
More information about the llvm-commits
mailing list