[llvm] r219823 - R600/SI: Also try to use 0 base for misaligned 8-byte DS loads.

Matt Arsenault Matthew.Arsenault at amd.com
Wed Oct 15 11:06:43 PDT 2014


Author: arsenm
Date: Wed Oct 15 13:06:43 2014
New Revision: 219823

URL: http://llvm.org/viewvc/llvm-project?rev=219823&view=rev
Log:
R600/SI: Also try to use 0 base for misaligned 8-byte DS loads.

Modified:
    llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/R600/ds_read2.ll
    llvm/trunk/test/CodeGen/R600/ds_write2.ll

Modified: llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp?rev=219823&r1=219822&r2=219823&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp Wed Oct 15 13:06:43 2014
@@ -823,6 +823,23 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4B
     }
   }
 
+  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
+    unsigned DWordOffset1 = DWordOffset0 + 1;
+    assert(4 * DWordOffset0 == CAddr->getZExtValue());
+
+    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+      SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+      MachineSDNode *MovZero
+        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+                                 SDLoc(Addr), MVT::i32, Zero);
+      Base = SDValue(MovZero, 0);
+      Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
+      Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+      return true;
+    }
+  }
+
   // default case
   Base = Addr;
   Offset0 = CurDAG->getTargetConstant(0, MVT::i8);

Modified: llvm/trunk/test/CodeGen/R600/ds_read2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/ds_read2.ll?rev=219823&r1=219822&r2=219823&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/ds_read2.ll (original)
+++ llvm/trunk/test/CodeGen/R600/ds_read2.ll Wed Oct 15 13:06:43 2014
@@ -406,6 +406,36 @@ define void @load_constant_disjoint_offs
   ret void
 }
 
+ at bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @load_misaligned64_constant_offsets
+; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
+define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
+  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+  %sum = add i64 %val0, %val1
+  store i64 %sum, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+ at bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @load_misaligned64_constant_large_offsets
+; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000
+; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
+; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI: S_ENDPGM
+define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
+  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+  %sum = add i64 %val0, %val1
+  store i64 %sum, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
 @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
 

Modified: llvm/trunk/test/CodeGen/R600/ds_write2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/ds_write2.ll?rev=219823&r1=219822&r2=219823&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/ds_write2.ll (original)
+++ llvm/trunk/test/CodeGen/R600/ds_write2.ll Wed Oct 15 13:06:43 2014
@@ -341,6 +341,32 @@ define void @store_constant_disjoint_off
   ret void
 }
 
+ at bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @store_misaligned64_constant_offsets
+; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+define void @store_misaligned64_constant_offsets() {
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+  ret void
+}
+
+ at bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @store_misaligned64_constant_large_offsets
+; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
+; SI-DAG: DS_WRITE2_B32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: DS_WRITE2_B32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: S_ENDPGM
+define void @store_misaligned64_constant_large_offsets() {
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+  ret void
+}
+
 @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
 





More information about the llvm-commits mailing list