[PATCH] D18028: DAGCombiner: Relax alignment restriction when changing store type

Wed Mar 9 21:05:38 PST 2016

arsenm created this revision.
arsenm added a subscriber: llvm-commits.
Herald added a reviewer: tstellarAMD.
Herald added a subscriber: aemerson.

If the target allows the alignment, this should be OK. This is the partner change to D17306.

This currently breaks 1 ARM test (test/CodeGen/ARM/vector-store.ll), which looks to me like allowsMisalignedMemoryAccesses  is incorrectly reporting fast.


The changes all look like this:
```
  define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
    ;CHECK-LABEL: store_v8i8_update:
    ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
    %A = load <8 x i8>*, <8 x i8>** %ptr
    store  <8 x i8> %val, <8 x i8>* %A, align 1
    %inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
    store <8 x i8>* %inc, <8 x i8>** %ptr
    ret void
  }
```
DAG at replacement time:
```
SelectionDAG has 17 nodes:
  t0: ch = EntryToken
  t2: i32,ch = CopyFromReg t0, Register:i32 %vreg0
  t9: i32 = Constant<0>
  t11: i32,ch = load<LD4[%ptr]> t0, t2, undef:i32
            t4: i32,ch = CopyFromReg t0, Register:i32 %vreg1
            t6: i32,ch = CopyFromReg t0, Register:i32 %vreg2
          t7: f64 = ARMISD::VMOVDRR t4, t6
        t8: v8i8 = bitcast t7
      t12: ch = store<ST8[%A](align=1)> t11:1, t8, t11, undef:i32
      t14: i32 = add t11, Constant:i32<8>
    t15: ch = store<ST4[%ptr]> t12, t14, t2, undef:i32
  t16: ch = ARMISD::RET_FLAG t15
```


Before:
```
  _store_v8i8_update:
  @ BB#0:
    vmov	d16, r1, r2
    ldr	r1, [r0]
    vst1.8	{d16}, [r1]!
    str	r1, [r0]
    bx	lr
```

After:
```
  _store_v8i8_update:
  @ BB#0:
    vmov	d16, r1, r2
    ldr	r1, [r0]
    vst1.8	{d16}, [r1]!
    str	r1, [r0]
    bx	lr
```


It also breaks 2 other X86 tests, both of which appear to be i1-vector related: 
X86/avx-vextractf128.ll, which replaces a
``` vxorps	%xmm0, %xmm0, %xmm0```
with a 
```vxorps	%ymm0, %ymm0, %ymm0```


X86/avx512-mask-op.ll, where an extra move is introduced



http://reviews.llvm.org/D18028

Files:
  include/llvm/Target/TargetLowering.h
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/AMDGPU/reduce-store-width-alignment.ll

Index: test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
===================================================================

--- test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
+++ test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
@@ -3,7 +3,7 @@
 ; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4:
 ; GCN: s_load_dwordx2
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) {
+define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
   %x.bc = bitcast <2 x i32> %x to <4 x i16>
   store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
   ret void
@@ -13,27 +13,26 @@
 ; GCN: s_load_dwordx4
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) {
+define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
   %x.bc = bitcast <4 x i32> %x to <8 x i16>
   store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4:
 ; GCN: s_load_dwordx2
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) {
+define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
   %x.bc = bitcast <2 x i32> %x to <4 x i16>
   store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
   ret void
 }
 
-
 ; GCN-LABEL: {{^}}store_v4i32_as_v2i64_align_4:
 ; GCN: s_load_dwordx4
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) {
+define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
   %x.bc = bitcast <4 x i32> %x to <2 x i64>
   store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4
   ret void
@@ -45,8 +44,10 @@
 ; GCN: buffer_load_ushort
 ; GCN: buffer_load_ushort
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) {
+define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 {
   %x.bc = bitcast <4 x i16> %x to <2 x i32>
   store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4
   ret void
 }
+
+attributes #0 = { nounwind }
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11864,14 +11864,11 @@
   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
     EVT SVT = Value.getOperand(0).getValueType();
-
-    if ((!LegalOperations && !ST->isVolatile()) ||
-        TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) {
+    if (((!LegalOperations && !ST->isVolatile()) ||
+         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+        TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
       unsigned OrigAlign = ST->getAlignment();
-      unsigned Align = DAG.getDataLayout().getABITypeAlignment(
-        SVT.getTypeForEVT(*DAG.getContext()));
-
-      bool Fast;
+      bool Fast = false;
       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
           Fast) {
Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -271,6 +271,15 @@
     return true;
   }
 
+  /// isStoreBitCastBeneficial() - Mirror of isLoadBitCastBeneficial(). Return
+  /// true if the following transform is beneficial.
+  ///
+  /// (store (y (conv x)), y*)) -> (store x, (x*))
+  virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
+    // Default to the same logic as stores.
+    return isLoadBitCastBeneficial(StoreVT, BitcastVT);
+  }
+
   /// Return true if it is expected to be cheaper to do a store of a non-zero
   /// vector constant with the given size and type for the address space than to
   /// store the individual scalar element constants.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D18028.50232.patch
Type: text/x-patch
Size: 4662 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160310/35a0ccc3/attachment.bin>