[PATCH] D18028: DAGCombiner: Relax alignment restriction when changing store type
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 9 21:05:38 PST 2016
arsenm created this revision.
arsenm added a subscriber: llvm-commits.
Herald added a reviewer: tstellarAMD.
Herald added a subscriber: aemerson.
If the target allows the alignment, this should be OK. This is the partner change to D17306.
This currently breaks 1 ARM test (test/CodeGen/ARM/vector-store.ll), which looks to me like allowsMisalignedMemoryAccesses is incorrectly reporting fast.
The changes all look like this:
```
define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
;CHECK-LABEL: store_v8i8_update:
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <8 x i8>*, <8 x i8>** %ptr
store <8 x i8> %val, <8 x i8>* %A, align 1
%inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
store <8 x i8>* %inc, <8 x i8>** %ptr
ret void
}
```
DAG at replacement time:
```
SelectionDAG has 17 nodes:
t0: ch = EntryToken
t2: i32,ch = CopyFromReg t0, Register:i32 %vreg0
t9: i32 = Constant<0>
t11: i32,ch = load<LD4[%ptr]> t0, t2, undef:i32
t4: i32,ch = CopyFromReg t0, Register:i32 %vreg1
t6: i32,ch = CopyFromReg t0, Register:i32 %vreg2
t7: f64 = ARMISD::VMOVDRR t4, t6
t8: v8i8 = bitcast t7
t12: ch = store<ST8[%A](align=1)> t11:1, t8, t11, undef:i32
t14: i32 = add t11, Constant:i32<8>
t15: ch = store<ST4[%ptr]> t12, t14, t2, undef:i32
t16: ch = ARMISD::RET_FLAG t15
```
Before:
```
_store_v8i8_update:
@ BB#0:
vmov d16, r1, r2
ldr r1, [r0]
vst1.8 {d16}, [r1]!
str r1, [r0]
bx lr
```
After:
```
_store_v8i8_update:
@ BB#0:
vmov d16, r1, r2
ldr r1, [r0]
vst1.8 {d16}, [r1]!
str r1, [r0]
bx lr
```
It also breaks 2 other X86 tests, both of which appear to be i1-vector related:
X86/avx-vextractf128.ll, which replaces a
``` vxorps %xmm0, %xmm0, %xmm0```
with a
```vxorps %ymm0, %ymm0, %ymm0```
X86/avx512-mask-op.ll, where an extra move is introduced
http://reviews.llvm.org/D18028
Files:
include/llvm/Target/TargetLowering.h
lib/CodeGen/SelectionDAG/DAGCombiner.cpp
test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
Index: test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
===================================================================
--- test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
+++ test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
@@ -3,7 +3,7 @@
; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4:
; GCN: s_load_dwordx2
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) {
+define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
%x.bc = bitcast <2 x i32> %x to <4 x i16>
store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
ret void
@@ -13,27 +13,26 @@
; GCN: s_load_dwordx4
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) {
+define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
%x.bc = bitcast <4 x i32> %x to <8 x i16>
store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4:
; GCN: s_load_dwordx2
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) {
+define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
%x.bc = bitcast <2 x i32> %x to <4 x i16>
store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
ret void
}
-
; GCN-LABEL: {{^}}store_v4i32_as_v2i64_align_4:
; GCN: s_load_dwordx4
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) {
+define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
%x.bc = bitcast <4 x i32> %x to <2 x i64>
store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4
ret void
@@ -45,8 +44,10 @@
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) {
+define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 {
%x.bc = bitcast <4 x i16> %x to <2 x i32>
store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4
ret void
}
+
+attributes #0 = { nounwind }
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11864,14 +11864,11 @@
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
EVT SVT = Value.getOperand(0).getValueType();
-
- if ((!LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) {
+ if (((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
unsigned OrigAlign = ST->getAlignment();
- unsigned Align = DAG.getDataLayout().getABITypeAlignment(
- SVT.getTypeForEVT(*DAG.getContext()));
-
- bool Fast;
+ bool Fast = false;
if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
ST->getAddressSpace(), OrigAlign, &Fast) &&
Fast) {
Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -271,6 +271,15 @@
return true;
}
+ /// isStoreBitCastBeneficial() - Mirror of isLoadBitCastBeneficial(). Return
+ /// true if the following transform is beneficial.
+ ///
+ /// (store (y (conv x)), y*)) -> (store x, (x*))
+ virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
+ // Default to the same logic as stores.
+ return isLoadBitCastBeneficial(StoreVT, BitcastVT);
+ }
+
/// Return true if it is expected to be cheaper to do a store of a non-zero
/// vector constant with the given size and type for the address space than to
/// store the individual scalar element constants.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D18028.50232.patch
Type: text/x-patch
Size: 4662 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160310/35a0ccc3/attachment.bin>
More information about the llvm-commits
mailing list