[llvm] r177160 - Enable unaligned memory access on PPC for scalar types
Bill Schmidt
wschmidt at linux.vnet.ibm.com
Fri Mar 15 13:17:48 PDT 2013
Hal, this looks good. Thanks for handling it! I would prefer to see
the structsinmem.ll and structsinregs.ll tests altered to use the better
code generation rather than flagging them as -disable-ppc-unaligned.
Whether or not that's changed, the FIXMEs should be removed from those
test cases.
Thanks again,
Bill
On Fri, 2013-03-15 at 15:27 +0000, Hal Finkel wrote:
> Author: hfinkel
> Date: Fri Mar 15 10:27:13 2013
> New Revision: 177160
>
> URL: http://llvm.org/viewvc/llvm-project?rev=177160&view=rev
> Log:
> Enable unaligned memory access on PPC for scalar types
>
> Unaligned access is supported on PPC for non-vector types, and is generally
> more efficient than manually expanding the loads and stores.
>
> A few of the existing test cases were using expanded unaligned loads and stores
> to test other features (like load/store with update), and for these test cases,
> unaligned access remains disabled.
>
> Added:
> llvm/trunk/test/CodeGen/PowerPC/unaligned.ll
> Modified:
> llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
> llvm/trunk/test/CodeGen/PowerPC/lbzux.ll
> llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll
> llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll
> llvm/trunk/test/CodeGen/PowerPC/stwu8.ll
>
> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=177160&r1=177159&r2=177160&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Mar 15 10:27:13 2013
> @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/stor
> static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
> cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
>
> +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
> +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
> +
> static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
> if (TM.getSubtargetImpl()->isDarwin())
> return new TargetLoweringObjectFileMachO();
> @@ -6851,6 +6854,32 @@ EVT PPCTargetLowering::getOptimalMemOpTy
> }
> }
>
> +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
> + bool *Fast) const {
> + if (DisablePPCUnaligned)
> + return false;
> +
> + // PowerPC supports unaligned memory access for simple non-vector types.
> + // Although accessing unaligned addresses is not as efficient as accessing
> + // aligned addresses, it is generally more efficient than manual expansion,
> + // and generally only traps for software emulation when crossing page
> + // boundaries.
> +
> + if (!VT.isSimple())
> + return false;
> +
> + if (VT.getSimpleVT().isVector())
> + return false;
> +
> + if (VT == MVT::ppcf128)
> + return false;
> +
> + if (Fast)
> + *Fast = true;
> +
> + return true;
> +}
> +
> /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
> /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
> /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
>
> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=177160&r1=177159&r2=177160&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Fri Mar 15 10:27:13 2013
> @@ -449,6 +449,10 @@ namespace llvm {
> bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
> MachineFunction &MF) const;
>
> + /// Is unaligned memory access allowed for the given type, and is it fast
> + /// relative to software emulation.
> + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
> +
> /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
> /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
> /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/lbzux.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/lbzux.ll?rev=177160&r1=177159&r2=177160&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/lbzux.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/lbzux.ll Fri Mar 15 10:27:13 2013
> @@ -1,6 +1,6 @@
> target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
> target triple = "powerpc64-unknown-linux-gnu"
> -; RUN: llc < %s | FileCheck %s
> +; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
>
> define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
> entry:
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll?rev=177160&r1=177159&r2=177160&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll Fri Mar 15 10:27:13 2013
> @@ -1,4 +1,4 @@
> -; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
> +; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
>
> ; FIXME: The code generation for packed structs is very poor because the
> ; PowerPC target wrongly rejects all unaligned loads. This test case will
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll?rev=177160&r1=177159&r2=177160&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll Fri Mar 15 10:27:13 2013
> @@ -1,4 +1,4 @@
> -; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
> +; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
>
> ; FIXME: The code generation for packed structs is very poor because the
> ; PowerPC target wrongly rejects all unaligned loads. This test case will
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/stwu8.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/stwu8.ll?rev=177160&r1=177159&r2=177160&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/stwu8.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/stwu8.ll Fri Mar 15 10:27:13 2013
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s | FileCheck %s
> +; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
> target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
> target triple = "powerpc64-unknown-linux-gnu"
>
>
> Added: llvm/trunk/test/CodeGen/PowerPC/unaligned.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/unaligned.ll?rev=177160&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/unaligned.ll (added)
> +++ llvm/trunk/test/CodeGen/PowerPC/unaligned.ll Fri Mar 15 10:27:13 2013
> @@ -0,0 +1,73 @@
> +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
> +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
> +
> +define void @foo1(i16* %p, i16* %r) nounwind {
> +entry:
> + %v = load i16* %p, align 1
> + store i16 %v, i16* %r, align 1
> + ret void
> +
> +; CHECK: @foo1
> +; CHECK: lhz
> +; CHECK: sth
> +}
> +
> +define void @foo2(i32* %p, i32* %r) nounwind {
> +entry:
> + %v = load i32* %p, align 1
> + store i32 %v, i32* %r, align 1
> + ret void
> +
> +; CHECK: @foo2
> +; CHECK: lwz
> +; CHECK: stw
> +}
> +
> +define void @foo3(i64* %p, i64* %r) nounwind {
> +entry:
> + %v = load i64* %p, align 1
> + store i64 %v, i64* %r, align 1
> + ret void
> +
> +; CHECK: @foo3
> +; CHECK: ld
> +; CHECK: std
> +}
> +
> +define void @foo4(float* %p, float* %r) nounwind {
> +entry:
> + %v = load float* %p, align 1
> + store float %v, float* %r, align 1
> + ret void
> +
> +; CHECK: @foo4
> +; CHECK: lfs
> +; CHECK: stfs
> +}
> +
> +define void @foo5(double* %p, double* %r) nounwind {
> +entry:
> + %v = load double* %p, align 1
> + store double %v, double* %r, align 1
> + ret void
> +
> +; CHECK: @foo5
> +; CHECK: lfd
> +; CHECK: stfd
> +}
> +
> +define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
> +entry:
> + %v = load <4 x float>* %p, align 1
> + store <4 x float> %v, <4 x float>* %r, align 1
> + ret void
> +
> +; These loads and stores are legalized into aligned loads and stores
> +; using aligned stack slots.
> +; CHECK: @foo6
> +; CHECK: ld
> +; CHECK: ld
> +; CHECK: std
> +; CHECK: std
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list