[llvm] r248169 - DAGCombiner: Replace store of FP constant after attemping store merges

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 21 08:59:46 PDT 2015


Author: arsenm
Date: Mon Sep 21 10:59:46 2015
New Revision: 248169

URL: http://llvm.org/viewvc/llvm-project?rev=248169&view=rev
Log:
DAGCombiner: Replace store of FP constant after attemping store merges

If storing multiple FP constants, some subset of the stores
would be replaced with integers due to visit order, so
MergeConsecutiveStores would only partially merge
these.

Added:
    llvm/trunk/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
    llvm/trunk/test/CodeGen/X86/vector-merge-store-fp-constants.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=248169&r1=248168&r2=248169&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Sep 21 10:59:46 2015
@@ -11485,16 +11485,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *
   if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
     return Chain;
 
-  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
-  //
-  // Make sure to do this only after attempting to merge stores in order to
-  //  avoid changing the types of some subset of stores due to visit order,
-  //  preventing their merging.
-  if (isa<ConstantFPSDNode>(Value)) {
-    if (SDValue NewSt = replaceStoreOfFPConstant(ST))
-      return NewSt;
-  }
-
   // Try to infer better alignment information than the store already has.
   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
@@ -11618,6 +11608,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *
       return SDValue(N, 0);
   }
 
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  //
+  // Make sure to do this only after attempting to merge stores in order to
+  //  avoid changing the types of some subset of stores due to visit order,
+  //  preventing their merging.
+  if (isa<ConstantFPSDNode>(Value)) {
+    if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+      return NewSt;
+  }
+
   return ReduceLoadOpStoreWidth(N);
 }
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll?rev=248169&r1=248168&r2=248169&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll Mon Sep 21 10:59:46 2015
@@ -121,10 +121,7 @@ define void @merge_global_store_4_consta
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dwordx2 v
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
   %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -137,17 +134,9 @@ define void @merge_global_store_4_consta
   ret void
 }
 
-; First store is out of order. Because of order of combines, the
-; consecutive store fails because only some of the stores have been
-; replaced with integer constant stores, and then won't merge because
-; the types are different.
-
+; First store is out of order.
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
   %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -159,6 +148,29 @@ define void @merge_global_store_4_consta
   store float 8.0, float addrspace(1)* %out
   ret void
 }
+
+; FIXME: Should be able to merge this
+; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
+; XGCN: buffer_store_dwordx4
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: s_endpgm
+define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+  %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+  %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
+  %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)*
+
+  store i32 11, i32 addrspace(1)* %out.gep.1.bc
+  store float 2.0, float addrspace(1)* %out.gep.2
+  store i32 17, i32 addrspace(1)* %out.gep.3.bc
+  store float 8.0, float addrspace(1)* %out
+  ret void
+}
 
 ; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
 ; SI-DAG: buffer_store_dwordx2

Added: llvm/trunk/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll?rev=248169&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll Mon Sep 21 10:59:46 2015
@@ -0,0 +1,28 @@
+; RUN: llc -march=ppc64 -mtriple=ppc64-apple-darwin < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}_merge_8_float_zero_stores:
+; CHECK: li [[ZEROREG:r[0-9]+]], 0
+; CHECK-DAG: std [[ZEROREG]], 0([[PTR:r[0-9]+]])
+; CHECK-DAG: std [[ZEROREG]], 8([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 16([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 24([[PTR]])
+; CHECK: blr
+define void @merge_8_float_zero_stores(float* %ptr) {
+  %idx0 = getelementptr float, float* %ptr, i64 0
+  %idx1 = getelementptr float, float* %ptr, i64 1
+  %idx2 = getelementptr float, float* %ptr, i64 2
+  %idx3 = getelementptr float, float* %ptr, i64 3
+  %idx4 = getelementptr float, float* %ptr, i64 4
+  %idx5 = getelementptr float, float* %ptr, i64 5
+  %idx6 = getelementptr float, float* %ptr, i64 6
+  %idx7 = getelementptr float, float* %ptr, i64 7
+  store float 0.0, float* %idx0, align 4
+  store float 0.0, float* %idx1, align 4
+  store float 0.0, float* %idx2, align 4
+  store float 0.0, float* %idx3, align 4
+  store float 0.0, float* %idx4, align 4
+  store float 0.0, float* %idx5, align 4
+  store float 0.0, float* %idx6, align 4
+  store float 0.0, float* %idx7, align 4
+  ret void
+}

Added: llvm/trunk/test/CodeGen/X86/vector-merge-store-fp-constants.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-merge-store-fp-constants.ll?rev=248169&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-merge-store-fp-constants.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vector-merge-store-fp-constants.ll Mon Sep 21 10:59:46 2015
@@ -0,0 +1,35 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=DEFAULTCPU -check-prefix=ALL %s
+; RUN: llc -march=x86-64 -mcpu=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=X8664CPU -check-prefix=ALL %s
+
+
+; ALL-LABEL: {{^}}merge_8_float_zero_stores:
+
+; DEFAULTCPU-DAG: movq $0, ([[PTR:%[a-z]+]])
+; DEFAULTCPU-DAG: movq $0, 8([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 16([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 24([[PTR]])
+
+; X8664CPU: xorps [[ZEROREG:%xmm[0-9]+]], [[ZEROREG]]
+; X8664CPU-DAG: movups [[ZEROREG]], ([[PTR:%[a-z]+]])
+; X8664CPU-DAG: movups [[ZEROREG]], 16([[PTR:%[a-z]+]])
+
+; ALL: retq
+define void @merge_8_float_zero_stores(float* %ptr) {
+  %idx0 = getelementptr float, float* %ptr, i64 0
+  %idx1 = getelementptr float, float* %ptr, i64 1
+  %idx2 = getelementptr float, float* %ptr, i64 2
+  %idx3 = getelementptr float, float* %ptr, i64 3
+  %idx4 = getelementptr float, float* %ptr, i64 4
+  %idx5 = getelementptr float, float* %ptr, i64 5
+  %idx6 = getelementptr float, float* %ptr, i64 6
+  %idx7 = getelementptr float, float* %ptr, i64 7
+  store float 0.0, float* %idx0, align 4
+  store float 0.0, float* %idx1, align 4
+  store float 0.0, float* %idx2, align 4
+  store float 0.0, float* %idx3, align 4
+  store float 0.0, float* %idx4, align 4
+  store float 0.0, float* %idx5, align 4
+  store float 0.0, float* %idx6, align 4
+  store float 0.0, float* %idx7, align 4
+  ret void
+}




More information about the llvm-commits mailing list