[llvm] r305203 - AMDGPU: Teach isLegalAddressingMode about flat offsets

Mon Jun 12 10:06:35 PDT 2017

Author: arsenm
Date: Mon Jun 12 12:06:35 2017
New Revision: 305203

URL: http://llvm.org/viewvc/llvm-project?rev=305203&view=rev
Log:
AMDGPU: Teach isLegalAddressingMode about flat offsets

Also fix reporting r+r as a valid addressing mode without
offsets.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=305203&r1=305202&r2=305203&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jun 12 12:06:35 2017
@@ -567,9 +567,17 @@ bool SITargetLowering::getAddrModeArgume
 }
 
 bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
-  // Flat instructions do not have offsets, and only have the register
-  // address.
-  return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+  if (!Subtarget->hasFlatInstOffsets()) {
+    // Flat instructions do not have offsets, and only have the register
+    // address.
+    return AM.BaseOffs == 0 && AM.Scale == 0;
+  }
+
+  // GFX9 added a 13-bit signed offset. When using regular flat instructions,
+  // the sign bit is ignored and is treated as a 12-bit unsigned offset.
+
+  // Just r + i
+  return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
 }
 
 bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {

Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll?rev=305203&r1=305202&r2=305203&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll Mon Jun 12 12:06:35 2017
@@ -1,12 +1,19 @@
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
 
 ; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %in
-; OPT: br i1
-; OPT-NOT: ptrtoint
+; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in
+; OPT-CIVI: br i1
+; OPT-CIVI-NOT: ptrtoint
+
+; OPT-GFX9: br
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %0, i64 28
+; OPT-GFX9: %1 = bitcast i8 addrspace(4)* %sunkaddr to i32 addrspace(4)*
+; OPT-GFX9: load i32, i32 addrspace(4)* %1
 
 ; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
 ; GCN: flat_load_dword
@@ -96,3 +103,105 @@ endif:
 done:
   ret void
 }
+
+; OPT-LABEL: @test_sink_flat_small_max_flat_offset(
+; OPT-CIVI: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+; OPT-CIVI: br
+; OPT-CIVI-NOT: getelementptr
+; OPT-CIVI: load i8, i8 addrspace(4)* %in.gep
+
+; OPT-GFX9: br
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+; OPT-GFX9: load i8, i8 addrspace(4)* %sunkaddr
+
+; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset:
+; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
+  %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tmp0 = icmp eq i32 %tid, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i8, i8 addrspace(4)* %in.gep
+  %tmp2 = sext i8 %tmp1 to i32
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(4)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
+; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+; OPT: br
+; OPT-NOT: getelementptr
+; OPT: load i8, i8 addrspace(4)* %in.gep
+
+; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset:
+; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 99999
+  %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tmp0 = icmp eq i32 %tid, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i8, i8 addrspace(4)* %in.gep
+  %tmp2 = sext i8 %tmp1 to i32
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(4)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_no_sink_flat_reg_offset(
+; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+; OPT: br
+
+; OPT-NOT: getelementptr
+; OPT: load i8, i8 addrspace(4)* %in.gep
+
+; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset:
+; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in, i64 %reg) #1 {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
+  %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tmp0 = icmp eq i32 %tid, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i8, i8 addrspace(4)* %in.gep
+  %tmp2 = sext i8 %tmp1 to i32
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(4)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind argmemonly }