[llvm] r239053 - R600/SI: Reimplement isLegalAddressingMode
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Jun 4 09:17:42 PDT 2015
Author: arsenm
Date: Thu Jun 4 11:17:42 2015
New Revision: 239053
URL: http://llvm.org/viewvc/llvm-project?rev=239053&view=rev
Log:
R600/SI: Reimplement isLegalAddressingMode
Now that we sometimes know the address space, this can
theoretically do a better job.
This needs better test coverage, but this mostly depends on
first updating the loop optimizatiosn to provide the address
space.
Added:
llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll
Modified:
llvm/trunk/lib/Target/R600/AMDGPU.h
llvm/trunk/lib/Target/R600/SIISelLowering.cpp
Modified: llvm/trunk/lib/Target/R600/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.h?rev=239053&r1=239052&r2=239053&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPU.h Thu Jun 4 11:17:42 2015
@@ -137,7 +137,10 @@ enum AddressSpaces {
CONSTANT_BUFFER_14 = 22,
CONSTANT_BUFFER_15 = 23,
ADDRESS_NONE = 24, ///< Address space for unknown memory.
- LAST_ADDRESS = ADDRESS_NONE
+ LAST_ADDRESS = ADDRESS_NONE,
+
+ // Some places use this if the address space can't be determined.
+ UNKNOWN_ADDRESS_SPACE = ~0u
};
} // namespace AMDGPUAS
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=239053&r1=239052&r2=239053&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Thu Jun 4 11:17:42 2015
@@ -250,47 +250,83 @@ bool SITargetLowering::isShuffleMaskLega
return false;
}
-// FIXME: This really needs an address space argument. The immediate offset
-// size is different for different sets of memory instruction sets.
-
-// The single offset DS instructions have a 16-bit unsigned byte offset.
-//
-// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r +
-// r + i with addr64. 32-bit has more addressing mode options. Depending on the
-// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
-//
-// SMRD instructions have an 8-bit, dword offset.
-//
bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
Type *Ty, unsigned AS) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
- // Allow a 16-bit unsigned immediate field, since this is what DS instructions
- // use.
- if (!isUInt<16>(AM.BaseOffs))
- return false;
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
+ // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+ // additionally can do r + r + i with addr64. 32-bit has more addressing
+ // mode options. Depending on the resource constant, it can also do
+ // (i64 r0) + (i32 r1) * (i14 i).
+ //
+ // SMRD instructions have an 8-bit, dword offset.
+ //
+ // Assume nonunifom access, since the address space isn't enough to know
+ // what instruction we will use, and since we don't know if this is a load
+ // or store and scalar stores are only available on VI.
+ //
+ // We also know if we are doing an extload, we can't do a scalar load.
+ //
+ // Private arrays end up using a scratch buffer most of the time, so also
+ // assume those use MUBUF instructions. Scratch loads / stores are currently
+ // implemented as mubuf instructions with offen bit set, so slightly
+ // different than the normal addr64.
+ if (!isUInt<12>(AM.BaseOffs))
+ return false;
- // Only support r+r,
- switch (AM.Scale) {
- case 0: // "r+i" or just "i", depending on HasBaseReg.
- break;
- case 1:
- if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ // FIXME: Since we can split immediate into soffset and immediate offset,
+ // would it make sense to allow any immediate?
+
+ switch (AM.Scale) {
+ case 0: // r + i or just i, depending on HasBaseReg.
+ return true;
+ case 1:
+ return true; // We have r + r or r + i.
+ case 2:
+ if (AM.HasBaseReg) {
+ // Reject 2 * r + r.
+ return false;
+ }
+
+ // Allow 2 * r as r + r
+ // Or 2 * r + i is allowed as r + r + i.
+ return true;
+ default: // Don't allow n * r
return false;
- // Otherwise we have r+r or r+i.
- break;
- case 2:
- if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ }
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // Basic, single offset DS instructions allow a 16-bit unsigned immediate
+ // field.
+ // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
+ // an 8-bit dword offset but we don't know the alignment here.
+ if (!isUInt<16>(AM.BaseOffs))
return false;
- // Allow 2*r as r+r.
- break;
- default: // Don't allow n * r
+
+ if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
+ return true;
+
+ if (AM.Scale == 1 && AM.HasBaseReg)
+ return true;
+
return false;
}
-
- return true;
+ case AMDGPUAS::FLAT_ADDRESS: {
+ // Flat instructions do not have offsets, and only have the register
+ // address.
+ return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+ }
+ default:
+ llvm_unreachable("unhandled address space");
+ }
}
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
Added: llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll?rev=239053&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll (added)
+++ llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll Thu Jun 4 11:17:42 2015
@@ -0,0 +1,242 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=OPT %s
+; RUN: llc -march=amdgcn -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+; OPT-LABEL: @test_sink_global_small_offset_i32(
+; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in
+; OPT: br i1
+; OPT: ptrtoint
+
+; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
+; GCN: {{^}}BB0_2:
+define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(1)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
+; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: {{^}}BB1_2:
+; GCN: s_or_b64 exec
+define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
+ %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i8, i8 addrspace(1)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
+; GCN: {{^}}BB2_2:
+; GCN: s_or_b64 exec
+define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
+ %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i8, i8 addrspace(1)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: {{^}}BB3_2:
+; GCN: s_or_b64 exec
+define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
+ %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i8, i8 addrspace(1)* %in.gep
+ %tmp2 = sext i8 %tmp1 to i32
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %in
+; OPT: br i1
+; OPT-NOT: ptrtoint
+
+; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
+; GCN: flat_load_dword
+; GCN: {{^}}BB4_2:
+
+define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_scratch_small_offset_i32(
+; OPT-NOT: getelementptr [512 x i32]
+; OPT: br i1
+; OPT: ptrtoint
+
+; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
+; GCN: {{^}}BB5_2:
+define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
+entry:
+ %alloca = alloca [512 x i32], align 4
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %add.arg = add i32 %arg, 8
+ %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ store volatile i32 123, i32* %alloca.gep
+ %tmp1 = load volatile i32, i32* %alloca.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep.0
+ %load = load volatile i32, i32* %alloca.gep
+ store i32 %load, i32 addrspace(1)* %out.gep.1
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
+; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+; OPT: br i1
+; OPT-NOT: ptrtoint
+
+; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; GCN: {{^}}BB6_2:
+define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
+entry:
+ %alloca = alloca [512 x i32], align 4
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %add.arg = add i32 %arg, 8
+ %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ store volatile i32 123, i32* %alloca.gep
+ %tmp1 = load volatile i32, i32* %alloca.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep.0
+ %load = load volatile i32, i32* %alloca.gep
+ store i32 %load, i32 addrspace(1)* %out.gep.1
+ br label %done
+
+done:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
+; GCN: s_and_saveexec_b64
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN: {{^}}BB7_2:
+define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
+entry:
+ %offset.ext = zext i32 %offset to i64
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(1)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
More information about the llvm-commits
mailing list