[llvm] r258785 - AMDGPU: Implement read_register and write_register intrinsics

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 25 20:29:25 PST 2016


Author: arsenm
Date: Mon Jan 25 22:29:24 2016
New Revision: 258785

URL: http://llvm.org/viewvc/llvm-project?rev=258785&view=rev
Log:
AMDGPU: Implement read_register and write_register intrinsics

Some of the special intrinsics now that now correspond to a instruction
also have special setting of some registers, e.g. llvm.SI.sendmsg sets
m0 as well as use s_sendmsg. Using these explicit register intrinsics
may be a better option.

Reading the exec mask and others may be useful for debugging. For this
I'm not sure this is entirely correct because we would want this to
be convergent, although it's possible this is already treated
sufficently conservatively.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
    llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
    llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
    llvm/trunk/test/CodeGen/AMDGPU/read_register.ll
    llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
    llvm/trunk/test/CodeGen/AMDGPU/write_register.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=258785&r1=258784&r2=258785&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jan 25 22:29:24 2016
@@ -27,6 +27,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -989,6 +990,52 @@ SDValue SITargetLowering::LowerReturn(SD
   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps);
 }
 
+unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
+                                             SelectionDAG &DAG) const {
+  unsigned Reg = StringSwitch<unsigned>(RegName)
+    .Case("m0", AMDGPU::M0)
+    .Case("exec", AMDGPU::EXEC)
+    .Case("exec_lo", AMDGPU::EXEC_LO)
+    .Case("exec_hi", AMDGPU::EXEC_HI)
+    .Case("flat_scratch", AMDGPU::FLAT_SCR)
+    .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
+    .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
+    .Default(AMDGPU::NoRegister);
+
+  if (Reg == AMDGPU::NoRegister) {
+    report_fatal_error(Twine("invalid register name \""
+                             + StringRef(RegName)  + "\"."));
+
+  }
+
+  if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+      Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
+    report_fatal_error(Twine("invalid register \""
+                             + StringRef(RegName)  + "\" for subtarget."));
+  }
+
+  switch (Reg) {
+  case AMDGPU::M0:
+  case AMDGPU::EXEC_LO:
+  case AMDGPU::EXEC_HI:
+  case AMDGPU::FLAT_SCR_LO:
+  case AMDGPU::FLAT_SCR_HI:
+    if (VT.getSizeInBits() == 32)
+      return Reg;
+    break;
+  case AMDGPU::EXEC:
+  case AMDGPU::FLAT_SCR:
+    if (VT.getSizeInBits() == 64)
+      return Reg;
+    break;
+  default:
+    llvm_unreachable("missing register type checking");
+  }
+
+  report_fatal_error(Twine("invalid type for register \""
+                           + StringRef(RegName) + "\"."));
+}
+
 MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MachineInstr * MI, MachineBasicBlock * BB) const {
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=258785&r1=258784&r2=258785&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Mon Jan 25 22:29:24 2016
@@ -104,6 +104,9 @@ public:
                       const SmallVectorImpl<SDValue> &OutVals,
                       SDLoc DL, SelectionDAG &DAG) const override;
 
+  unsigned getRegisterByName(const char* RegName, EVT VT,
+                             SelectionDAG &DAG) const override;
+
   MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
                                       MachineBasicBlock * BB) const override;
   bool enableAggressiveFMAFusion(EVT VT) const override;

Added: llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll?rev=258785&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll Mon Jan 25 22:29:24 2016
@@ -0,0 +1,14 @@
+; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck %s
+
+; CHECK: invalid register "flat_scratch_lo" for subtarget.
+
+declare i32 @llvm.read_register.i32(metadata) #0
+
+define void @test_invalid_read_flat_scratch_lo(i32 addrspace(1)* %out) nounwind {
+  store volatile i32 0, i32 addrspace(3)* undef
+  %m0 = call i32 @llvm.read_register.i32(metadata !0)
+  store i32 %m0, i32 addrspace(1)* %out
+  ret void
+}
+
+!0 = !{!"flat_scratch_lo"}

Added: llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll?rev=258785&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll Mon Jan 25 22:29:24 2016
@@ -0,0 +1,14 @@
+; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s
+
+; CHECK: invalid type for register "exec".
+
+declare i32 @llvm.read_register.i32(metadata) #0
+
+define void @test_invalid_read_exec(i32 addrspace(1)* %out) nounwind {
+  store volatile i32 0, i32 addrspace(3)* undef
+  %m0 = call i32 @llvm.read_register.i32(metadata !0)
+  store i32 %m0, i32 addrspace(1)* %out
+  ret void
+}
+
+!0 = !{!"exec"}

Added: llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll?rev=258785&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll Mon Jan 25 22:29:24 2016
@@ -0,0 +1,13 @@
+; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s
+
+; CHECK: invalid type for register "m0".
+
+declare i64 @llvm.read_register.i64(metadata) #0
+
+define void @test_invalid_read_m0(i64 addrspace(1)* %out) #0 {
+  %exec = call i64 @llvm.read_register.i64(metadata !0)
+  store i64 %exec, i64 addrspace(1)* %out
+  ret void
+}
+
+!0 = !{!"m0"}

Added: llvm/trunk/test/CodeGen/AMDGPU/read_register.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read_register.ll?rev=258785&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read_register.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/read_register.ll Mon Jan 25 22:29:24 2016
@@ -0,0 +1,81 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.read_register.i32(metadata) #0
+declare i64 @llvm.read_register.i64(metadata) #0
+
+; CHECK-LABEL: {{^}}test_read_m0:
+; CHECK: s_mov_b32 m0, -1
+; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
+; CHECK: buffer_store_dword [[COPY]]
+define void @test_read_m0(i32 addrspace(1)* %out) #0 {
+  store volatile i32 0, i32 addrspace(3)* undef
+  %m0 = call i32 @llvm.read_register.i32(metadata !0)
+  store i32 %m0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_read_exec:
+; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], exec_lo
+; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], exec_hi
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @test_read_exec(i64 addrspace(1)* %out) #0 {
+  %exec = call i64 @llvm.read_register.i64(metadata !1)
+  store i64 %exec, i64 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_read_flat_scratch:
+; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], flat_scratch_lo
+; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], flat_scratch_hi
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @test_read_flat_scratch(i64 addrspace(1)* %out) #0 {
+  %flat_scratch = call i64 @llvm.read_register.i64(metadata !2)
+  store i64 %flat_scratch, i64 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_read_flat_scratch_lo:
+; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_lo
+; CHECK: buffer_store_dword [[COPY]]
+define void @test_read_flat_scratch_lo(i32 addrspace(1)* %out) #0 {
+  %flat_scratch_lo = call i32 @llvm.read_register.i32(metadata !3)
+  store i32 %flat_scratch_lo, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_read_flat_scratch_hi:
+; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_hi
+; CHECK: buffer_store_dword [[COPY]]
+define void @test_read_flat_scratch_hi(i32 addrspace(1)* %out) #0 {
+  %flat_scratch_hi = call i32 @llvm.read_register.i32(metadata !4)
+  store i32 %flat_scratch_hi, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_read_exec_lo:
+; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_lo
+; CHECK: buffer_store_dword [[COPY]]
+define void @test_read_exec_lo(i32 addrspace(1)* %out) #0 {
+  %exec_lo = call i32 @llvm.read_register.i32(metadata !5)
+  store i32 %exec_lo, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_read_exec_hi:
+; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_hi
+; CHECK: buffer_store_dword [[COPY]]
+define void @test_read_exec_hi(i32 addrspace(1)* %out) #0 {
+  %exec_hi = call i32 @llvm.read_register.i32(metadata !6)
+  store i32 %exec_hi, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{!"m0"}
+!1 = !{!"exec"}
+!2 = !{!"flat_scratch"}
+!3 = !{!"flat_scratch_lo"}
+!4 = !{!"flat_scratch_hi"}
+!5 = !{!"exec_lo"}
+!6 = !{!"exec_hi"}

Added: llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll?rev=258785&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll Mon Jan 25 22:29:24 2016
@@ -0,0 +1,22 @@
+; XFAIL: *
+; REQUIRES: asserts
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
+
+; write_register doesn't prevent us from illegally trying to write a
+; vgpr value into a scalar register, but I don't think there's much we
+; can do to avoid this.
+
+declare void @llvm.write_register.i32(metadata, i32) #0
+declare i32 @llvm.r600.read.tidig.x() #0
+
+
+define void @write_vgpr_into_sgpr() {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  call void @llvm.write_register.i32(metadata !0, i32 %tid)
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!0 = !{!"exec_lo"}

Added: llvm/trunk/test/CodeGen/AMDGPU/write_register.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/write_register.ll?rev=258785&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/write_register.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/write_register.ll Mon Jan 25 22:29:24 2016
@@ -0,0 +1,80 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
+
+declare void @llvm.write_register.i32(metadata, i32) #0
+declare void @llvm.write_register.i64(metadata, i64) #0
+
+; CHECK-LABEL: {{^}}test_write_m0:
+define void @test_write_m0(i32 %val) #0 {
+  call void @llvm.write_register.i32(metadata !0, i32 0)
+  call void @llvm.write_register.i32(metadata !0, i32 -1)
+  call void @llvm.write_register.i32(metadata !0, i32 %val)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_write_exec:
+; CHECK: s_mov_b64 exec, 0
+; CHECK: s_mov_b64 exec, -1
+; CHECK: s_mov_b64 exec, s{{\[[0-9]+:[0-9]+\]}}
+define void @test_write_exec(i64 %val) #0 {
+  call void @llvm.write_register.i64(metadata !1, i64 0)
+  call void @llvm.write_register.i64(metadata !1, i64 -1)
+  call void @llvm.write_register.i64(metadata !1, i64 %val)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_write_flat_scratch:
+; CHECK: s_mov_b64 flat_scratch, 0
+; CHECK: s_mov_b64 flat_scratch, -1
+; CHECK: s_mov_b64 flat_scratch, s{{\[[0-9]+:[0-9]+\]}}
+define void @test_write_flat_scratch(i64 %val) #0 {
+  call void @llvm.write_register.i64(metadata !2, i64 0)
+  call void @llvm.write_register.i64(metadata !2, i64 -1)
+  call void @llvm.write_register.i64(metadata !2, i64 %val)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_write_flat_scratch_lo:
+; CHECK: s_mov_b32 flat_scratch_lo, 0
+; CHECK: s_mov_b32 flat_scratch_lo, s{{[0-9]+}}
+define void @test_write_flat_scratch_lo(i32 %val) #0 {
+  call void @llvm.write_register.i32(metadata !3, i32 0)
+  call void @llvm.write_register.i32(metadata !3, i32 %val)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_write_flat_scratch_hi:
+; CHECK: s_mov_b32 flat_scratch_hi, 0
+; CHECK: s_mov_b32 flat_scratch_hi, s{{[0-9]+}}
+define void @test_write_flat_scratch_hi(i32 %val) #0 {
+  call void @llvm.write_register.i32(metadata !4, i32 0)
+  call void @llvm.write_register.i32(metadata !4, i32 %val)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_write_exec_lo:
+; CHECK: s_mov_b32 exec_lo, 0
+; CHECK: s_mov_b32 exec_lo, s{{[0-9]+}}
+define void @test_write_exec_lo(i32 %val) #0 {
+  call void @llvm.write_register.i32(metadata !5, i32 0)
+  call void @llvm.write_register.i32(metadata !5, i32 %val)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}test_write_exec_hi:
+; CHECK: s_mov_b32 exec_hi, 0
+; CHECK: s_mov_b32 exec_hi, s{{[0-9]+}}
+define void @test_write_exec_hi(i32 %val) #0 {
+  call void @llvm.write_register.i32(metadata !6, i32 0)
+  call void @llvm.write_register.i32(metadata !6, i32 %val)
+  ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{!"m0"}
+!1 = !{!"exec"}
+!2 = !{!"flat_scratch"}
+!3 = !{!"flat_scratch_lo"}
+!4 = !{!"flat_scratch_hi"}
+!5 = !{!"exec_lo"}
+!6 = !{!"exec_hi"}




More information about the llvm-commits mailing list