[PATCH] D63716: AMDGPU/GFX10: implement ds_ordered_count changes

Mon Jul 1 10:18:36 PDT 2019

This revision was automatically updated to reflect the committed changes.
Closed by commit rL364815: AMDGPU/GFX10: implement ds_ordered_count changes (authored by nha, committed by ).

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63716/new/

https://reviews.llvm.org/D63716

Files:
  llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
  llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll


Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5891,12 +5891,29 @@
     SDValue Chain = M->getOperand(0);
     SDValue M0 = M->getOperand(2);
     SDValue Value = M->getOperand(3);
-    unsigned OrderedCountIndex = M->getConstantOperandVal(7);
+    unsigned IndexOperand = M->getConstantOperandVal(7);
     unsigned WaveRelease = M->getConstantOperandVal(8);
     unsigned WaveDone = M->getConstantOperandVal(9);
     unsigned ShaderType;
     unsigned Instruction;
 
+    unsigned OrderedCountIndex = IndexOperand & 0x3f;
+    IndexOperand &= ~0x3f;
+    unsigned CountDw = 0;
+
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) {
+      CountDw = (IndexOperand >> 24) & 0xf;
+      IndexOperand &= ~(0xf << 24);
+
+      if (CountDw < 1 || CountDw > 4) {
+        report_fatal_error(
+            "ds_ordered_count: dword count must be between 1 and 4");
+      }
+    }
+
+    if (IndexOperand)
+      report_fatal_error("ds_ordered_count: bad index operand");
+
     switch (IntrID) {
     case Intrinsic::amdgcn_ds_ordered_add:
       Instruction = 0;
@@ -5930,6 +5947,10 @@
     unsigned Offset0 = OrderedCountIndex << 2;
     unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
                        (Instruction << 4);
+
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
+      Offset1 |= (CountDw - 1) << 6;
+
     unsigned Offset = Offset0 | (Offset1 << 8);
 
     SDValue Ops[] = {
Index: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -406,6 +406,7 @@
    llvm_i32_ty, // scope
    llvm_i1_ty,  // isVolatile
    llvm_i32_ty, // ordered count index (OA index), also added to the address
+                // gfx10: bits 24-27 indicate the number of active threads/dwords
    llvm_i1_ty,  // wave release, usually set to 1
    llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
   [NoCapture<0>,
Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
+
+; FUNC-LABEL: {{^}}ds_ordered_add:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_4dw:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:49924 gds
+define amdgpu_kernel void @ds_ordered_add_4dw(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 67108865, i1 true, i1 true)
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D63716.207370.patch
Type: text/x-patch
Size: 3691 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190701/31648a1f/attachment-0001.bin>