[llvm] r267217 - DAGCombiner: Relax alignment restriction when changing store type

Fri Apr 22 14:01:42 PDT 2016

Author: arsenm
Date: Fri Apr 22 16:01:41 2016
New Revision: 267217

URL: http://llvm.org/viewvc/llvm-project?rev=267217&view=rev
Log:
DAGCombiner: Relax alignment restriction when changing store type

If the target allows the alignment, this should be OK.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
Modified:
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/avx-vextractf128.ll

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=267217&r1=267216&r2=267217&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri Apr 22 16:01:41 2016
@@ -286,6 +286,15 @@ public:
     return true;
   }
 
+  /// isStoreBitCastBeneficial() - Mirror of isLoadBitCastBeneficial(). Return
+  /// true if the following transform is beneficial.
+  ///
+  /// (store (y (conv x)), y*)) -> (store x, (x*))
+  virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
+    // Default to the same logic as stores.
+    return isLoadBitCastBeneficial(StoreVT, BitcastVT);
+  }
+
   /// Return true if it is expected to be cheaper to do a store of a non-zero
   /// vector constant with the given size and type for the address space than to
   /// store the individual scalar element constants.

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=267217&r1=267216&r2=267217&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Apr 22 16:01:41 2016
@@ -11970,17 +11970,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *
   // resultant store does not need a higher alignment than the original.
   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
-    unsigned OrigAlign = ST->getAlignment();
     EVT SVT = Value.getOperand(0).getValueType();
-    unsigned Align = DAG.getDataLayout().getABITypeAlignment(
-        SVT.getTypeForEVT(*DAG.getContext()));
-    if (Align <= OrigAlign &&
-        ((!LegalOperations && !ST->isVolatile()) ||
-         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
-      return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
-                          Ptr, ST->getPointerInfo(), ST->isVolatile(),
-                          ST->isNonTemporal(), OrigAlign,
-                          ST->getAAInfo());
+    if (((!LegalOperations && !ST->isVolatile()) ||
+         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+        TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
+      unsigned OrigAlign = ST->getAlignment();
+      bool Fast = false;
+      if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
+                                 ST->getAddressSpace(), OrigAlign, &Fast) &&
+          Fast) {
+        return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
+                            Ptr, ST->getPointerInfo(), ST->isVolatile(),
+                            ST->isNonTemporal(), OrigAlign,
+                            ST->getAAInfo());
+      }
+    }
   }
 
   // Turn 'store undef, Ptr' -> nothing.

Added: llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll?rev=267217&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll Fri Apr 22 16:01:41 2016
@@ -0,0 +1,53 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4:
+; GCN: s_load_dwordx2
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
+  %x.bc = bitcast <2 x i32> %x to <4 x i16>
+  store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_v4i32_as_v8i16_align_4:
+; GCN: s_load_dwordx4
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
+  %x.bc = bitcast <4 x i32> %x to <8 x i16>
+  store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4:
+; GCN: s_load_dwordx2
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
+  %x.bc = bitcast <2 x i32> %x to <4 x i16>
+  store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_v4i32_as_v2i64_align_4:
+; GCN: s_load_dwordx4
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
+  %x.bc = bitcast <4 x i32> %x to <2 x i64>
+  store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_v4i16_as_v2i32_align_4:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 {
+  %x.bc = bitcast <4 x i16> %x to <2 x i32>
+  store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }

Modified: llvm/trunk/test/CodeGen/X86/avx-vextractf128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vextractf128.ll?rev=267217&r1=267216&r2=267217&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vextractf128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vextractf128.ll Fri Apr 22 16:01:41 2016
@@ -119,7 +119,7 @@ entry:
 define void @t9(i64* %p) {
 ; CHECK-LABEL: t9:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
 ; CHECK-NEXT:    vmovups %ymm0, (%rdi)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq