[PATCH] D45053: AMDGPU: Fix selection error on constant loads with < 4 byte alignment
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 29 10:23:44 PDT 2018
arsenm created this revision.
arsenm added reviewers: rampitec, FarhanaAleen, alex-t.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
https://reviews.llvm.org/D45053
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
test/CodeGen/AMDGPU/load-constant-i16.ll
test/CodeGen/AMDGPU/load-global-i16.ll
Index: test/CodeGen/AMDGPU/load-global-i16.ll
===================================================================
--- test/CodeGen/AMDGPU/load-global-i16.ll
+++ test/CodeGen/AMDGPU/load-global-i16.ll
@@ -83,6 +83,18 @@
ret void
}
+; GCN-LABEL: {{^}}global_load_v16i16_align2:
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 {
+entry:
+ %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2
+ store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32
+ ret void
+}
+
; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
; GCN-NOHSA: buffer_load_ushort
; GCN-NOHSA: buffer_store_dword
Index: test/CodeGen/AMDGPU/load-constant-i16.ll
===================================================================
--- test/CodeGen/AMDGPU/load-constant-i16.ll
+++ test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -72,6 +72,18 @@
ret void
}
+; FUNC-LABEL: {{^}}constant_load_v16i16_align2:
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
+entry:
+ %ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
+ store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
+ ret void
+}
+
; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
; GCN-NOHSA: buffer_load_ushort
; GCN-NOHSA: buffer_store_dword
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3464,10 +3464,6 @@
return false;
}
-static bool isDwordAligned(unsigned Alignment) {
- return Alignment % 4 == 0;
-}
-
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
@@ -5385,21 +5381,23 @@
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
+
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
- if (!Op->isDivergent())
+ if (!Op->isDivergent() && Alignment >= 4)
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
}
+
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
- isDwordAligned(Alignment))
+ Alignment >= 4)
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45053.140279.patch
Type: text/x-patch
Size: 3121 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180329/aabf0c02/attachment.bin>
More information about the llvm-commits
mailing list