[llvm] r280298 - AMDGPU: Fix introducing stack access on unaligned v16i8
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 31 14:52:28 PDT 2016
Author: arsenm
Date: Wed Aug 31 16:52:27 2016
New Revision: 280298
URL: http://llvm.org/viewvc/llvm-project?rev=280298&view=rev
Log:
AMDGPU: Fix introducing stack access on unaligned v16i8
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=280298&r1=280297&r2=280298&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Aug 31 16:52:27 2016
@@ -2012,6 +2012,9 @@ SDValue AMDGPUTargetLowering::performLoa
// problems during legalization, the emitted instructions to pack and unpack
// the bytes again are not eliminated in the case of an unaligned copy.
if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+ if (VT.isVector())
+ return scalarizeVectorLoad(LN, DAG);
+
SDValue Ops[2];
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
return DAG.getMergeValues(Ops, SDLoc(N));
@@ -2060,8 +2063,12 @@ SDValue AMDGPUTargetLowering::performSto
// order problems during legalization, the emitted instructions to pack and
// unpack the bytes again are not eliminated in the case of an unaligned
// copy.
- if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast))
+ if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+ if (VT.isVector())
+ return scalarizeVectorStore(SN, DAG);
+
return expandUnalignedStore(SN, DAG);
+ }
if (!IsFast)
return SDValue();
Modified: llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll?rev=280298&r1=280297&r2=280298&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll Wed Aug 31 16:52:27 2016
@@ -1,15 +1,13 @@
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s
-; XUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
;
; EG-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
-; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: MEM_{{.*}} MSKOR [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
; EG: ADD_INT
; EG: LSHL
-; EG: ASHR [[RES]]
+; EG: ASHR
; EG: LSHL
-; EG: ASHR [[RES]]
+; EG: ASHR
; EG: LSHR {{\*?}} [[ADDR]]
; Works with the align 2 removed
Modified: llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll?rev=280298&r1=280297&r2=280298&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll Wed Aug 31 16:52:27 2016
@@ -552,4 +552,53 @@ define void @constant_align4_merge_load_
ret void
}
+; SI-LABEL: {{^}}local_load_align1_v16i8:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ScratchSize: 0{{$}}
+define void @local_load_align1_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(3)* %in) #0 {
+ %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in, align 1
+ store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}local_store_align1_v16i8:
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: ScratchSize: 0{{$}}
+define void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out) #0 {
+ store <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* %out, align 1
+ ret void
+}
+
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list