[llvm-branch-commits] [llvm-branch] r235206 - Merging r227822:
Tom Stellard
thomas.stellard at amd.com
Fri Apr 17 09:59:30 PDT 2015
Author: tstellar
Date: Fri Apr 17 11:59:29 2015
New Revision: 235206
URL: http://llvm.org/viewvc/llvm-project?rev=235206&view=rev
Log:
Merging r227822:
------------------------------------------------------------------------
r227822 | thomas.stellard | 2015-02-02 13:02:28 -0500 (Mon, 02 Feb 2015) | 6 lines
R600/SI: 64-bit and larger memory access must be at least 4-byte aligned
This is true for SI only. CI+ supports unaligned memory accesses,
but this requires driver support, so for now we disallow unaligned
accesses for all GCN targets.
------------------------------------------------------------------------
Modified:
llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp
llvm/branches/release_36/test/CodeGen/R600/cvt_f32_ubyte.ll
llvm/branches/release_36/test/CodeGen/R600/unaligned-load-store.ll
Modified: llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp?rev=235206&r1=235205&r2=235206&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp Fri Apr 17 11:59:29 2015
@@ -314,9 +314,8 @@ bool SITargetLowering::allowsMisalignedM
if (!VT.isSimple() || VT == MVT::Other)
return false;
- // XXX - CI changes say "Support for unaligned memory accesses" but I don't
- // see what for specifically. The wording everywhere else seems to be the
- // same.
+ // TODO - CI+ supports unaligned memory accesses, but this requires driver
+ // support.
// XXX - The only mention I see of this in the ISA manual is for LDS direct
// reads the "byte address and must be dword aligned". Is it also true for the
@@ -333,7 +332,8 @@ bool SITargetLowering::allowsMisalignedM
// This applies to private, global, and constant memory.
if (IsFast)
*IsFast = true;
- return VT.bitsGT(MVT::i32);
+
+ return VT.bitsGT(MVT::i32) && Align % 4 == 0;
}
EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
Modified: llvm/branches/release_36/test/CodeGen/R600/cvt_f32_ubyte.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/cvt_f32_ubyte.ll?rev=235206&r1=235205&r2=235206&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/cvt_f32_ubyte.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/cvt_f32_ubyte.ll Fri Apr 17 11:59:29 2015
@@ -146,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x flo
; SI: buffer_store_dword
; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in, align 1
+ %load = load <8 x i8> addrspace(1)* %in, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
ret void
Modified: llvm/branches/release_36/test/CodeGen/R600/unaligned-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/unaligned-load-store.ll?rev=235206&r1=235205&r2=235206&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/unaligned-load-store.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/unaligned-load-store.ll Fri Apr 17 11:59:29 2015
@@ -1,18 +1,65 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: {{^}}unaligned_load_store_i32:
+; SI-LABEL: {{^}}unaligned_load_store_i32_local:
+; SI: ds_read_u8
+; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_write_b32
; SI: s_endpgm
-define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
%v = load i32 addrspace(3)* %p, align 1
store i32 %v, i32 addrspace(3)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}unaligned_load_store_v4i32:
+; SI-LABEL: {{^}}unaligned_load_store_i32_global:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
+ %v = load i32 addrspace(1)* %p, align 1
+ store i32 %v, i32 addrspace(1)* %r, align 1
+ ret void
+}
+
+; SI-LABEL: {{^}}unaligned_load_store_i64_local:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_write2_b32
+; SI: s_endpgm
+define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
+ %v = load i64 addrspace(3)* %p, align 1
+ store i64 %v, i64 addrspace(3)* %r, align 1
+ ret void
+}
+
+; SI-LABEL: {{^}}unaligned_load_store_i64_global:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_store_dwordx2
+define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
+ %v = load i64 addrspace(1)* %p, align 1
+ store i64 %v, i64 addrspace(1)* %r, align 1
+ ret void
+}
+
+; SI-LABEL: {{^}}unaligned_load_store_v4i32_local:
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
@@ -38,12 +85,36 @@ define void @unaligned_load_store_i32(i3
; SI: ds_write_b32
; SI: ds_write_b32
; SI: s_endpgm
-define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
+define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
%v = load <4 x i32> addrspace(3)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
ret void
}
+; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded.
+; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+; FIXME-SI: buffer_load_ubyte
+define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
+ %v = load <4 x i32> addrspace(1)* %p, align 1
+ store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
+ ret void
+}
+
; SI-LABEL: {{^}}load_lds_i64_align_4:
; SI: ds_read2_b32
; SI: s_endpgm
More information about the llvm-branch-commits
mailing list