[PATCH] D19325: DAGCombine: (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 20 07:00:39 PDT 2016


tstellarAMD created this revision.
tstellarAMD added a reviewer: resistor.
tstellarAMD added a subscriber: llvm-commits.

We already have a combine for this pattern when the input to shl
is add, so we just need to enable the transformation with the input
is or.

http://reviews.llvm.org/D19325

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll

Index: test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
@@ -21,4 +21,26 @@
   ret void
 }
 
+; CHECK-LABEL: {{^}}ds_bpermute_add_shl:
+; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
+; CHECK: s_waitcnt lgkmcnt
+define void @ds_bpermute_add_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
+  %index = add i32 %base_index, 1
+  %byte_index = shl i32 %index, 2
+  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0
+  store i32 %bpermute, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL: {{^}}ds_bpermute_or_shl:
+; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
+; CHECK: s_waitcnt lgkmcnt
+define void @ds_bpermute_or_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
+  %index = or i32 %base_index, 1
+  %byte_index = shl i32 %index, 2
+  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0
+  store i32 %bpermute, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
 attributes #0 = { nounwind readnone convergent }
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4539,10 +4539,12 @@
   }
 
   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+  // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
   // Variant of version done on multiply, except mul by a power of 2 is turned
   // into a shift.
   APInt Val;
-  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+  if (N1C && (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
+      N0.getNode()->hasOneUse() &&
       (isa<ConstantSDNode>(N0.getOperand(1)) ||
        isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D19325.54364.patch
Type: text/x-patch
Size: 2086 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160420/392ff1dc/attachment.bin>


More information about the llvm-commits mailing list