[llvm] r186491 - Make x86 fast-isel correctly choose between aligned and unaligned operations for vector stores. Fixes PR16640.

Eric Christopher echristo at gmail.com
Tue Jul 16 23:54:35 PDT 2013


Thanks for the fix Craig!

-eric

On Tue, Jul 16, 2013 at 10:57 PM, Craig Topper <craig.topper at gmail.com> wrote:
> Author: ctopper
> Date: Wed Jul 17 00:57:45 2013
> New Revision: 186491
>
> URL: http://llvm.org/viewvc/llvm-project?rev=186491&view=rev
> Log:
> Make x86 fast-isel correctly choose between aligned and unaligned operations for vector stores. Fixes PR16640.
>
> Added:
>     llvm/trunk/test/CodeGen/X86/fast-isel-store.ll
>       - copied, changed from r186485, llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll
> Removed:
>     llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll
> Modified:
>     llvm/trunk/lib/Target/X86/X86FastISel.cpp
>     llvm/trunk/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=186491&r1=186490&r2=186491&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Wed Jul 17 00:57:45 2013
> @@ -79,8 +79,10 @@ private:
>
>    bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
>
> -  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
> -  bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
> +  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
> +                        bool Aligned = false);
> +  bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
> +                        bool Aligned = false);
>
>    bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
>                           unsigned &ResultReg);
> @@ -233,7 +235,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
>  /// and a displacement offset, or a GlobalAddress,
>  /// i.e. V. Return true if it is possible.
>  bool
> -X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
> +X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
> +                              const X86AddressMode &AM, bool Aligned) {
>    // Get opcode and regclass of the output for the given store instruction.
>    unsigned Opc = 0;
>    switch (VT.getSimpleVT().SimpleTy) {
> @@ -243,8 +246,8 @@ X86FastISel::X86FastEmitStore(EVT VT, un
>      // Mask out all but lowest bit.
>      unsigned AndResult = createResultReg(&X86::GR8RegClass);
>      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
> -            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
> -    Val = AndResult;
> +            TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
> +    ValReg = AndResult;
>    }
>    // FALLTHROUGH, handling i1 as i8.
>    case MVT::i8:  Opc = X86::MOV8mr;  break;
> @@ -260,26 +263,35 @@ X86FastISel::X86FastEmitStore(EVT VT, un
>            (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
>      break;
>    case MVT::v4f32:
> -    Opc = X86::MOVAPSmr;
> +    if (Aligned)
> +      Opc = X86::MOVAPSmr;
> +    else
> +      Opc = X86::MOVUPSmr;
>      break;
>    case MVT::v2f64:
> -    Opc = X86::MOVAPDmr;
> +    if (Aligned)
> +      Opc = X86::MOVAPSmr;
> +    else
> +      Opc = X86::MOVUPSmr;
>      break;
>    case MVT::v4i32:
>    case MVT::v2i64:
>    case MVT::v8i16:
>    case MVT::v16i8:
> -    Opc = X86::MOVDQAmr;
> +    if (Aligned)
> +      Opc = X86::MOVDQAmr;
> +    else
> +      Opc = X86::MOVDQUmr;
>      break;
>    }
>
>    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
> -                         DL, TII.get(Opc)), AM).addReg(Val);
> +                         DL, TII.get(Opc)), AM).addReg(ValReg);
>    return true;
>  }
>
>  bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
> -                                   const X86AddressMode &AM) {
> +                                   const X86AddressMode &AM, bool Aligned) {
>    // Handle 'null' like i32/i64 0.
>    if (isa<ConstantPointerNull>(Val))
>      Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
> @@ -314,7 +326,7 @@ bool X86FastISel::X86FastEmitStore(EVT V
>    if (ValReg == 0)
>      return false;
>
> -  return X86FastEmitStore(VT, ValReg, AM);
> +  return X86FastEmitStore(VT, ValReg, AM, Aligned);
>  }
>
>  /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
> @@ -688,6 +700,10 @@ bool X86FastISel::X86SelectStore(const I
>    if (S->isAtomic())
>      return false;
>
> +  unsigned SABIAlignment =
> +    TD.getABITypeAlignment(S->getValueOperand()->getType());
> +  bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
> +
>    MVT VT;
>    if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
>      return false;
> @@ -696,7 +712,7 @@ bool X86FastISel::X86SelectStore(const I
>    if (!X86SelectAddress(I->getOperand(1), AM))
>      return false;
>
> -  return X86FastEmitStore(VT, I->getOperand(0), AM);
> +  return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
>  }
>
>  /// X86SelectRet - Select and emit code to implement ret instructions.
>
> Modified: llvm/trunk/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll?rev=186491&r1=186490&r2=186491&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll Wed Jul 17 00:57:45 2013
> @@ -20,7 +20,7 @@ entry:
>    %2 = load <4 x float>* %p3, align 16
>    %3 = load <4 x float>* %p4, align 16
>    %4 = load <4 x float>* %p5, align 16
> -; CHECK:      movaps {{%xmm[0-7]}}, (%esp)
> +; CHECK:      movups {{%xmm[0-7]}}, (%esp)
>  ; CHECK-NEXT: calll _dovectortest
>    call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
>    ret void
>
> Copied: llvm/trunk/test/CodeGen/X86/fast-isel-store.ll (from r186485, llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll)
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-store.ll?p2=llvm/trunk/test/CodeGen/X86/fast-isel-store.ll&p1=llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll&r1=186485&r2=186491&rev=186491&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fast-isel-store.ll Wed Jul 17 00:57:45 2013
> @@ -1,5 +1,5 @@
> -; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
> -; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
> +; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
> +; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
>
>  define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
>  entry:
> @@ -16,3 +16,33 @@ entry:
>  }
>
>  ; CHECK: ret
> +
> +define <4 x i32> @test_store_4xi32(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
> +; CHECK: movdqu
> +; CHECK: ret
> +  %foo = add <4 x i32> %value, %value2 ; to force integer type on store
> +  store <4 x i32> %foo, <4 x i32>* %addr, align 1
> +  ret <4 x i32> %foo
> +}
> +
> +define <4 x i32> @test_store_4xi32_aligned(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
> +; CHECK: movdqa
> +; CHECK: ret
> +  %foo = add <4 x i32> %value, %value2 ; to force integer type on store
> +  store <4 x i32> %foo, <4 x i32>* %addr, align 16
> +  ret <4 x i32> %foo
> +}
> +
> +define <4 x float> @test_store_4xf32(<4 x float>* nocapture %addr, <4 x float> %value) {
> +; CHECK: movups
> +; CHECK: ret
> +  store <4 x float> %value, <4 x float>* %addr, align 1
> +  ret <4 x float> %value
> +}
> +
> +define <4 x float> @test_store_4xf32_aligned(<4 x float>* nocapture %addr, <4 x float> %value) {
> +; CHECK: movaps
> +; CHECK: ret
> +  store <4 x float> %value, <4 x float>* %addr, align 16
> +  ret <4 x float> %value
> +}
>
> Removed: llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll?rev=186490&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fast-isel-unaligned-store.ll (removed)
> @@ -1,18 +0,0 @@
> -; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
> -; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
> -
> -define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
> -entry:
> -  store i32 %value, i32* %addr, align 1
> -  ret i32 %value
> -}
> -
> -; CHECK: ret
> -
> -define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
> -entry:
> -  store i16 %value, i16* %addr, align 1
> -  ret i16 %value
> -}
> -
> -; CHECK: ret
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list