[PATCH] R600/SI: 64-bit and larger memory access must be at least 4-byte aligned

Fri Jan 30 12:56:06 PST 2015

---
 lib/Target/R600/SIISelLowering.cpp   |  2 +-
 test/CodeGen/R600/cvt_f32_ubyte.ll   |  2 +-
 test/CodeGen/R600/misaligned-load.ll | 20 ++++++++++++++++++--
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 79ddede..8545dd1 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -333,7 +333,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   // This applies to private, global, and constant memory.
   if (IsFast)
     *IsFast = true;
-  return VT.bitsGT(MVT::i32);
+  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
 }
 
 EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
index 710a400..4d4bf93 100644
--- a/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -146,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
 ; SI: buffer_store_dword
 ; SI: buffer_store_dword
 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in, align 1
+  %load = load <8 x i8> addrspace(1)* %in, align 8
   %cvt = uitofp <8 x i8> %load to <8 x float>
   store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
   ret void
diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll
index 6290ca0..d7f9f83 100644
--- a/test/CodeGen/R600/misaligned-load.ll
+++ b/test/CodeGen/R600/misaligned-load.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI: @byte_aligned_load64
+; SI-LABEL: {{^}}byte_aligned_load64_local:
 ; SI: ds_read_u8
 ; SI: ds_read_u8
 ; SI: ds_read_u8
@@ -10,9 +10,25 @@
 ; SI: ds_read_u8
 ; SI: ds_read_u8
 ; SI: s_endpgm
-define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
+define void @byte_aligned_load64_local(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
 entry:
   %0 = load i64 addrspace(3)* %in, align 1
   store i64 %0, i64 addrspace(1)* %out
   ret void
 }
+
+; SI-LABEL: {{^}}byte_aligned_load64_global:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+define void @byte_aligned_load64_global(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+entry:
+  %0 = load i64 addrspace(1)* %in, align 1
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}
-- 
2.0.4