[llvm] r364815 - AMDGPU/GFX10: implement ds_ordered_count changes
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 10:17:52 PDT 2019
Author: nha
Date: Mon Jul 1 10:17:52 2019
New Revision: 364815
URL: http://llvm.org/viewvc/llvm-project?rev=364815&view=rev
Log:
AMDGPU/GFX10: implement ds_ordered_count changes
Summary:
ds_ordered_count can now simultaneously operate on up to 4 dwords
in a single instruction, which are taken from (and returned to)
lanes 0..3 of a single VGPR.
Change-Id: I19b6e7b0732b617c10a779a7f9c0303eec7dd276
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63716
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=364815&r1=364814&r2=364815&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Mon Jul 1 10:17:52 2019
@@ -406,6 +406,7 @@ class AMDGPUDSOrderedIntrinsic : Intrins
llvm_i32_ty, // scope
llvm_i1_ty, // isVolatile
llvm_i32_ty, // ordered count index (OA index), also added to the address
+ // gfx10: bits 24-27 indicate the number of active threads/dwords
llvm_i1_ty, // wave release, usually set to 1
llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
[NoCapture<0>,
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=364815&r1=364814&r2=364815&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jul 1 10:17:52 2019
@@ -5891,12 +5891,29 @@ SDValue SITargetLowering::LowerINTRINSIC
SDValue Chain = M->getOperand(0);
SDValue M0 = M->getOperand(2);
SDValue Value = M->getOperand(3);
- unsigned OrderedCountIndex = M->getConstantOperandVal(7);
+ unsigned IndexOperand = M->getConstantOperandVal(7);
unsigned WaveRelease = M->getConstantOperandVal(8);
unsigned WaveDone = M->getConstantOperandVal(9);
unsigned ShaderType;
unsigned Instruction;
+ unsigned OrderedCountIndex = IndexOperand & 0x3f;
+ IndexOperand &= ~0x3f;
+ unsigned CountDw = 0;
+
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) {
+ CountDw = (IndexOperand >> 24) & 0xf;
+ IndexOperand &= ~(0xf << 24);
+
+ if (CountDw < 1 || CountDw > 4) {
+ report_fatal_error(
+ "ds_ordered_count: dword count must be between 1 and 4");
+ }
+ }
+
+ if (IndexOperand)
+ report_fatal_error("ds_ordered_count: bad index operand");
+
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
Instruction = 0;
@@ -5930,6 +5947,10 @@ SDValue SITargetLowering::LowerINTRINSIC
unsigned Offset0 = OrderedCountIndex << 2;
unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
(Instruction << 4);
+
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
+ Offset1 |= (CountDw - 1) << 6;
+
unsigned Offset = Offset0 | (Offset1 << 8);
SDValue Ops[] = {
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll?rev=364815&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll Mon Jul 1 10:17:52 2019
@@ -0,0 +1,23 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
+
+; FUNC-LABEL: {{^}}ds_ordered_add:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+ %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_4dw:
+; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: s_mov_b32 m0,
+; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:49924 gds
+define amdgpu_kernel void @ds_ordered_add_4dw(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
+ %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 67108865, i1 true, i1 true)
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
More information about the llvm-commits
mailing list