[PATCH] R600/SI: 64-bit and larger memory access must be at least 4-byte aligned

Fri Jan 30 15:49:52 PST 2015

On Fri, Jan 30, 2015 at 02:17:40PM -0800, Matt Arsenault wrote:
> 
> > On Jan 30, 2015, at 12:56 PM, Tom Stellard <thomas.stellard at amd.com> wrote:
> > 
> > ---
> > lib/Target/R600/SIISelLowering.cpp   |  2 +-
> > test/CodeGen/R600/cvt_f32_ubyte.ll   |  2 +-
> > test/CodeGen/R600/misaligned-load.ll | 20 ++++++++++++++++++--
> > 3 files changed, 20 insertions(+), 4 deletions(-)
> > 
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index 79ddede..8545dd1 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -333,7 +333,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
> >   // This applies to private, global, and constant memory.
> >   if (IsFast)
> >     *IsFast = true;
> > -  return VT.bitsGT(MVT::i32);
> > +  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
> > }
> 
> 
> LGTM, but I don’t think this is true on CI+
> 

I tested this on CI and byte aligned 64-bit mubuf loads don't work.  Maybe
flat instructions can do unaligned loads?

-Tom

> 
> > 
> > EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
> > diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
> > index 710a400..4d4bf93 100644
> > --- a/test/CodeGen/R600/cvt_f32_ubyte.ll
> > +++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
> > @@ -146,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
> > ; SI: buffer_store_dword
> > ; SI: buffer_store_dword
> > define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
> > -  %load = load <8 x i8> addrspace(1)* %in, align 1
> > +  %load = load <8 x i8> addrspace(1)* %in, align 8
> >   %cvt = uitofp <8 x i8> %load to <8 x float>
> >   store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
> >   ret void
> > diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll
> > index 6290ca0..d7f9f83 100644
> > --- a/test/CodeGen/R600/misaligned-load.ll
> > +++ b/test/CodeGen/R600/misaligned-load.ll
> > @@ -1,6 +1,6 @@
> > ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
> > 
> > -; SI: @byte_aligned_load64
> > +; SI-LABEL: {{^}}byte_aligned_load64_local:
> > ; SI: ds_read_u8
> > ; SI: ds_read_u8
> > ; SI: ds_read_u8
> > @@ -10,9 +10,25 @@
> > ; SI: ds_read_u8
> > ; SI: ds_read_u8
> > ; SI: s_endpgm
> > -define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
> > +define void @byte_aligned_load64_local(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
> > entry:
> >   %0 = load i64 addrspace(3)* %in, align 1
> >   store i64 %0, i64 addrspace(1)* %out
> >   ret void
> > }
> > +
> > +; SI-LABEL: {{^}}byte_aligned_load64_global:
> > +; SI: buffer_load_ubyte
> > +; SI: buffer_load_ubyte
> > +; SI: buffer_load_ubyte
> > +; SI: buffer_load_ubyte
> > +; SI: buffer_load_ubyte
> > +; SI: buffer_load_ubyte
> > +; SI: buffer_load_ubyte
> > +; SI: buffer_load_ubyte
> > +define void @byte_aligned_load64_global(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
> > +entry:
> > +  %0 = load i64 addrspace(1)* %in, align 1
> > +  store i64 %0, i64 addrspace(1)* %out
> > +  ret void
> > +}
> > -- 
> > 2.0.4
> > 
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu <mailto:llvm-commits at cs.uiuc.edu>
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits