[PATCH] D17594: AMDGPU: Eliminate half of i64 or if one operand is zero_extend from i32

Wed Feb 24 23:01:10 PST 2016

arsenm created this revision.
arsenm added a reviewer: tstellarAMD.
arsenm added a subscriber: llvm-commits.
Herald added a subscriber: arsenm.

This helps clean up some of the mess when expanding unaligned 64-bit
loads when changed to be promote to v2i32, and fixes situations
where or x, 0 was emitted after splitting 64-bit ors during moveToVALU.
    
I think this could be a generic combine but I'm not sure.

http://reviews.llvm.org/D17594

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  test/CodeGen/AMDGPU/zext-i64-bit-operand.ll

Index: test/CodeGen/AMDGPU/zext-i64-bit-operand.ll
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/zext-i64-bit-operand.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}zext_or_operand_i64:
+; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dword v[[LD32:[0-9]+]]
+; GCN-NOT: or
+; GCN-NOT: v[[HI]]
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]]
+; GCN-NOT: or
+; GCN-NOT: v[[HI]]
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @zext_or_operand_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
+  %ld.64 = load volatile i64, i64 addrspace(1)* %in0
+  %ld.32 = load volatile i32, i32 addrspace(1)* %in1
+  %ext = zext i32 %ld.32 to i64
+  %or = or i64 %ld.64, %ext
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}zext_or_operand_commute_i64:
+; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dword v[[LD32:[0-9]+]]
+; GCN-NOT: or
+; GCN-NOT: v[[HI]]
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]]
+; GCN-NOT: v[[HI]]
+; GCN-NOT: or
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @zext_or_operand_commute_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
+  %ld.64 = load volatile i64, i64 addrspace(1)* %in0
+  %ld.32 = load volatile i32, i32 addrspace(1)* %in1
+  %ext = zext i32 %ld.32 to i64
+  %or = or i64 %ext, %ld.64
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2224,6 +2224,33 @@
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
+  EVT VT = N->getValueType(0);
+  if (VT == MVT::i64) {
+    // (or i64:x, (zero_extend i32:y)) ->
+    //   i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
+    if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
+        RHS.getOpcode() != ISD::ZERO_EXTEND)
+      std::swap(LHS, RHS);
+
+    if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
+      SDValue ExtSrc = RHS.getOperand(0);
+      EVT SrcVT = ExtSrc.getValueType();
+      if (SrcVT == MVT::i32) {
+        SDLoc SL(N);
+        SDValue LowLHS, HiBits;
+        std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
+        SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
+
+        DCI.AddToWorklist(LowOr.getNode());
+        DCI.AddToWorklist(HiBits.getNode());
+
+        SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+                                  LowOr, HiBits);
+        return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
+      }
+    }
+  }
+
   // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
   if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
       RHS.getOpcode() == AMDGPUISD::FP_CLASS) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D17594.49010.patch
Type: text/x-patch
Size: 3207 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160225/39c5fc67/attachment.bin>