[PATCH] D39685: AMDGPU: Handle or in multi-use shl ptr combi

Matt Arsenault via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 14 14:55:38 PST 2017


arsenm updated this revision to Diff 122921.
arsenm added a comment.

Fix condition


https://reviews.llvm.org/D39685

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  test/CodeGen/AMDGPU/shl_add_ptr.ll


Index: test/CodeGen/AMDGPU/shl_add_ptr.ll
===================================================================
--- test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -384,5 +384,39 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_lds:
+; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
+; GCN: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:32
+
+; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
+; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}} offset:64
+define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
+  %idx.add = or i32 %idx, 4
+  %shl0 = shl i32 %idx.add, 3
+  %shl1 = shl i32 %idx.add, 4
+  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
+  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
+  store volatile i32 9, i32 addrspace(3)* %ptr0
+  store volatile i32 10, i32 addrspace(3)* %ptr1
+  ret void
+}
+
+; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_max_lds_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
+; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
+; GCN-DAG: v_or_b32_e32 [[ADD1:v[0-9]+]], 0x1fff0, [[SCALE1]]
+; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
+define void @shl_or_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
+  %idx.add = or i32 %idx, 8191
+  %shl0 = shl i32 %idx.add, 3
+  %shl1 = shl i32 %idx.add, 4
+  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
+  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
+  store volatile i32 9, i32 addrspace(3)* %ptr0
+  store volatile i32 10, i32 addrspace(3)* %ptr1
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5215,8 +5215,8 @@
 
   // We only do this to handle cases where it's profitable when there are
   // multiple uses of the add, so defer to the standard combine.
-  // TODO: Support or
-  if (N0.getOpcode() != ISD::ADD || N0->hasOneUse())
+  if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) ||
+      N0->hasOneUse())
     return SDValue();
 
   const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D39685.122921.patch
Type: text/x-patch
Size: 2259 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171114/ef7deeb0/attachment.bin>


More information about the llvm-commits mailing list