[llvm] r252606 - Reapply "[ARM] Combine CMOV into BFI where possible"

Mon Nov 16 02:52:42 PST 2015

Hi Oliver,

Thanks for catching this. It was a nasty bug. Fixed in r253195.

Cheers,

James

On Fri, 13 Nov 2015 at 13:36 Oliver Stannard via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Hi James,
>
> I've found a codegen fault with csmith that seems to be caused by this
> commit. Here's the reproducer for it:
>   #include <stdio.h>
>
>   unsigned a = 1;
>   struct {
>     unsigned f0 : 1;
>     unsigned f1 : 16;
>   } d = {0, 0}, f = {0, 0};
>
>   __attribute((noinline))
>   void foo() {
>     f.f0 = d.f0 < (a | 1);
>   }
>
>   int main() {
>     foo();
>     printf("checksum = %X\n", f.f0);
>   }
>
> This should print "checksum = 1" because the comparison in foo will be
> true (d.f0 is 0, (a | 1) is 1). However, with this patch applied, it prints
> "checksum = 0".
>
> The foo function is being miscompiled, here's the IR for that function:
>   @a = global i32 1, align 4
>   @d = global { i8, i8, i8, i8 } { i8 0, i8 0, i8 0, i8 undef }, align 4
>   @f = global { i8, i8, i8, i8 } { i8 0, i8 0, i8 0, i8 undef }, align 4
>   define void @foo() noinline nounwind {
>   entry:
>     %bf.load = load i32, i32* bitcast ({ i8, i8, i8, i8 }* @d to i32*),
> align 4
>     %bf.clear = and i32 %bf.load, 1
>     %0 = load i32, i32* @a, align 4, !tbaa !3
>     %or = or i32 %0, 1
>     %cmp = icmp ne i32 %bf.clear, %or
>     %conv = zext i1 %cmp to i32
>     %bf.load1 = load i32, i32* bitcast ({ i8, i8, i8, i8 }* @f to i32*),
> align 4
>     %bf.clear2 = and i32 %bf.load1, -2
>     %bf.set = or i32 %conv, %bf.clear2
>     store i32 %bf.set, i32* bitcast ({ i8, i8, i8, i8 }* @f to i32*),
> align 4
>     ret void
>   }
>
> Here's the assembly I get for this function (with this patch applied):
>   foo:
>         movw    r0, :lower16:f
>         movw    r2, :lower16:d
>         movt    r0, :upper16:f
>         movt    r2, :upper16:d
>         ldr     r1, [r0]
>         ldr     r2, [r2]
>         bfc     r1, #0, #1
>         bfi     r1, r2, #0, #1
>         str     r1, [r0]
>         bx      lr
>
> Oliver
>
> > -----Original Message-----
> > From: llvm-commits [mailto:llvm-commits-bounces at lists.llvm.org] On
> Behalf
> > Of James Molloy via llvm-commits
> > Sent: 10 November 2015 14:22
> > To: llvm-commits at lists.llvm.org
> > Subject: [llvm] r252606 - Reapply "[ARM] Combine CMOV into BFI where
> > possible"
> >
> > Author: jamesm
> > Date: Tue Nov 10 08:22:05 2015
> > New Revision: 252606
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=252606&view=rev
> > Log:
> > Reapply "[ARM] Combine CMOV into BFI where possible"
> >
> > Added fixes for stage2 failures: CMOV is not commutable; commuting the
> > operands results in the condition being flipped! d'oh!
> >
> > Original commit message:
> >
> > If we have a CMOV, OR and AND combination such as:
> >   if (x & CN)
> >       y |= CM;
> >
> > And:
> >   * CN is a single bit;
> >     * All bits covered by CM are known zero in y;
> >
> > Then we can convert this to a sequence of BFI instructions. This will
> > always be a win if CM is a single bit, will always be no worse than the
> > TST & OR sequence if CM is two bits, and for thumb will be no worse if CM
> > is three bits (due to the extra IT instruction).
> >
> > Modified:
> >     llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> >     llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> >     llvm/trunk/test/CodeGen/ARM/bfi.ll
> >
> > Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> > URL: http://llvm.org/viewvc/llvm-
> >
> project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=252606&r1=252605
> > &r2=252606&view=diff
> >
> ==========================================================================
> > ====
> > --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> > +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Nov 10 08:22:05
> 2015
> > @@ -10241,6 +10241,111 @@ static SDValue PerformExtendCombine(SDNo
> >    return SDValue();
> >  }
> >
> > +static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt
> > &KnownZero,
> > +                             APInt &KnownOne) {
> > +  if (Op.getOpcode() == ARMISD::BFI) {
> > +    // Conservatively, we can recurse down the first operand
> > +    // and just mask out all affected bits.
> > +    computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
> > +
> > +    // The operand to BFI is already a mask suitable for removing the
> > bits it
> > +    // sets.
> > +    ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
> > +    APInt Mask = CI->getAPIntValue();
> > +    KnownZero &= Mask;
> > +    KnownOne &= Mask;
> > +    return;
> > +  }
> > +  if (Op.getOpcode() == ARMISD::CMOV) {
> > +    APInt KZ2(KnownZero.getBitWidth(), 0);
> > +    APInt KO2(KnownOne.getBitWidth(), 0);
> > +    computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
> > +    computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
> > +
> > +    KnownZero &= KZ2;
> > +    KnownOne &= KO2;
> > +    return;
> > +  }
> > +  return DAG.computeKnownBits(Op, KnownZero, KnownOne);
> > +}
> > +
> > +SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV,
> > SelectionDAG &DAG) const {
> > +  // If we have a CMOV, OR and AND combination such as:
> > +  //   if (x & CN)
> > +  //     y |= CM;
> > +  //
> > +  // And:
> > +  //   * CN is a single bit;
> > +  //   * All bits covered by CM are known zero in y
> > +  //
> > +  // Then we can convert this into a sequence of BFI instructions. This
> > will
> > +  // always be a win if CM is a single bit, will always be no worse than
> > the
> > +  // TST&OR sequence if CM is two bits, and for thumb will be no worse
> if
> > CM is
> > +  // three bits (due to the extra IT instruction).
> > +
> > +  SDValue Op0 = CMOV->getOperand(0);
> > +  SDValue Op1 = CMOV->getOperand(1);
> > +  SDValue CmpZ = CMOV->getOperand(4);
> > +
> > +  assert(CmpZ->getOpcode() == ARMISD::CMPZ);
> > +  SDValue And = CmpZ->getOperand(0);
> > +  if (And->getOpcode() != ISD::AND)
> > +    return SDValue();
> > +  ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
> > +  if (!AndC || !AndC->getAPIntValue().isPowerOf2())
> > +    return SDValue();
> > +  SDValue X = And->getOperand(0);
> > +
> > +  if (Op1->getOpcode() != ISD::OR)
> > +    return SDValue();
> > +
> > +  ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
> > +  if (!OrC)
> > +    return SDValue();
> > +  SDValue Y = Op1->getOperand(0);
> > +
> > +  if (Op0 != Y)
> > +    return SDValue();
> > +
> > +  // Now, is it profitable to continue?
> > +  APInt OrCI = OrC->getAPIntValue();
> > +  unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
> > +  if (OrCI.countPopulation() > Heuristic)
> > +    return SDValue();
> > +
> > +  // Lastly, can we determine that the bits defined by OrCI
> > +  // are zero in Y?
> > +  APInt KnownZero, KnownOne;
> > +  computeKnownBits(DAG, Y, KnownZero, KnownOne);
> > +  if ((OrCI & KnownZero) != OrCI)
> > +    return SDValue();
> > +
> > +  // OK, we can do the combine.
> > +  SDValue V = Y;
> > +  SDLoc dl(X);
> > +  EVT VT = X.getValueType();
> > +  unsigned BitInX = AndC->getAPIntValue().logBase2();
> > +
> > +  if (BitInX != 0) {
> > +    // We must shift X first.
> > +    X = DAG.getNode(ISD::SRL, dl, VT, X,
> > +                    DAG.getConstant(BitInX, dl, VT));
> > +  }
> > +
> > +  for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
> > +       BitInY < NumActiveBits; ++BitInY) {
> > +    if (OrCI[BitInY] == 0)
> > +      continue;
> > +    APInt Mask(VT.getSizeInBits(), 0);
> > +    Mask.setBit(BitInY);
> > +    V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
> > +                    // Confusingly, the operand is an *inverted* mask.
> > +                    DAG.getConstant(~Mask, dl, VT));
> > +  }
> > +
> > +  return V;
> > +}
> > +
> >  /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
> >  SDValue
> >  ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG)
> const
> > {
> > @@ -10259,6 +10364,13 @@ ARMTargetLowering::PerformCMOVCombine(SD
> >    ARMCC::CondCodes CC =
> >      (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
> >
> > +  // BFI is only available on V6T2+.
> > +  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
> > +    SDValue R = PerformCMOVToBFICombine(N, DAG);
> > +    if (R)
> > +      return R;
> > +  }
> > +
> >    // Simplify
> >    //   mov     r1, r0
> >    //   cmp     r1, x
> >
> > Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> > URL: http://llvm.org/viewvc/llvm-
> >
> project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=252606&r1=252605&r
> > 2=252606&view=diff
> >
> ==========================================================================
> > ====
> > --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> > +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue Nov 10 08:22:05 2015
> > @@ -260,6 +260,7 @@ namespace llvm {
> >                                         SDNode *Node) const override;
> >
> >      SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
> > +    SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
> >      SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
> > override;
> >
> >      bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const
> > override;
> >
> > Modified: llvm/trunk/test/CodeGen/ARM/bfi.ll
> > URL: http://llvm.org/viewvc/llvm-
> >
> project/llvm/trunk/test/CodeGen/ARM/bfi.ll?rev=252606&r1=252605&r2=252606&
> > view=diff
> >
> ==========================================================================
> > ====
> > --- llvm/trunk/test/CodeGen/ARM/bfi.ll (original)
> > +++ llvm/trunk/test/CodeGen/ARM/bfi.ll Tue Nov 10 08:22:05 2015
> > @@ -74,3 +74,37 @@ entry:
> >    %or = or i32 %shl, %and
> >    ret i32 %or
> >  }
> > +
> > +define i32 @f7(i32 %x, i32 %y) {
> > +; CHECK-LABEL: f7:
> > +; CHECK: bfi r1, r0, #4, #1
> > +  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
> > +  %and = and i32 %x, 4
> > +  %or = or i32 %y2, 16
> > +  %cmp = icmp ne i32 %and, 0
> > +  %sel = select i1 %cmp, i32 %or, i32 %y2
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @f8(i32 %x, i32 %y) {
> > +; CHECK-LABEL: f8:
> > +; CHECK: bfi r1, r0, #4, #1
> > +; CHECK: bfi r1, r0, #5, #1
> > +  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
> > +  %and = and i32 %x, 4
> > +  %or = or i32 %y2, 48
> > +  %cmp = icmp ne i32 %and, 0
> > +  %sel = select i1 %cmp, i32 %or, i32 %y2
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @f9(i32 %x, i32 %y) {
> > +; CHECK-LABEL: f9:
> > +; CHECK-NOT: bfi
> > +  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
> > +  %and = and i32 %x, 4
> > +  %or = or i32 %y2, 48
> > +  %cmp = icmp ne i32 %and, 0
> > +  %sel = select i1 %cmp, i32 %y2, i32 %or
> > +  ret i32 %sel
> > +}
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at lists.llvm.org
> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
> ________________________________
>
> -- IMPORTANT NOTICE: The contents of this email and any attachments are
> confidential and may also be privileged. If you are not the intended
> recipient, please notify the sender immediately and do not disclose the
> contents to any other person, use it for any purpose, or store or copy the
> information in any medium. Thank you.
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151116/16c75d6a/attachment.html>