[llvm] r305203 - AMDGPU: Teach isLegalAddressingMode about flat offsets
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 12 10:06:35 PDT 2017
Author: arsenm
Date: Mon Jun 12 12:06:35 2017
New Revision: 305203
URL: http://llvm.org/viewvc/llvm-project?rev=305203&view=rev
Log:
AMDGPU: Teach isLegalAddressingMode about flat offsets
Also fix reporting r+r as a valid addressing mode without
offsets.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=305203&r1=305202&r2=305203&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jun 12 12:06:35 2017
@@ -567,9 +567,17 @@ bool SITargetLowering::getAddrModeArgume
}
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
- // Flat instructions do not have offsets, and only have the register
- // address.
- return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+ if (!Subtarget->hasFlatInstOffsets()) {
+ // Flat instructions do not have offsets, and only have the register
+ // address.
+ return AM.BaseOffs == 0 && AM.Scale == 0;
+ }
+
+ // GFX9 added a 13-bit signed offset. When using regular flat instructions,
+ // the sign bit is ignored and is treated as a 12-bit unsigned offset.
+
+ // Just r + i
+ return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
}
bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll?rev=305203&r1=305202&r2=305203&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll Mon Jun 12 12:06:35 2017
@@ -1,12 +1,19 @@
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %in
-; OPT: br i1
-; OPT-NOT: ptrtoint
+; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in
+; OPT-CIVI: br i1
+; OPT-CIVI-NOT: ptrtoint
+
+; OPT-GFX9: br
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %0, i64 28
+; OPT-GFX9: %1 = bitcast i8 addrspace(4)* %sunkaddr to i32 addrspace(4)*
+; OPT-GFX9: load i32, i32 addrspace(4)* %1
; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
; GCN: flat_load_dword
@@ -96,3 +103,105 @@ endif:
done:
ret void
}
+
+; OPT-LABEL: @test_sink_flat_small_max_flat_offset(
+; OPT-CIVI: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+; OPT-CIVI: br
+; OPT-CIVI-NOT: getelementptr
+; OPT-CIVI: load i8, i8 addrspace(4)* %in.gep
+
+; OPT-GFX9: br
+; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+; OPT-GFX9: load i8, i8 addrspace(4)* %sunkaddr
+
+; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset:
+; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
+ %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095
+ %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+ %tmp0 = icmp eq i32 %tid, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
+; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+; OPT: br
+; OPT-NOT: getelementptr
+; OPT: load i8, i8 addrspace(4)* %in.gep
+
+; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset:
+; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 99999
+ %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096
+ %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+ %tmp0 = icmp eq i32 %tid, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_no_sink_flat_reg_offset(
+; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+; OPT: br
+
+; OPT-NOT: getelementptr
+; OPT: load i8, i8 addrspace(4)* %in.gep
+
+; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset:
+; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in, i64 %reg) #1 {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024
+ %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg
+ %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+ %tmp0 = icmp eq i32 %tid, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i8, i8 addrspace(4)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind argmemonly }
More information about the llvm-commits
mailing list