[llvm] r239053 - R600/SI: Reimplement isLegalAddressingMode

Mon Jun 8 14:37:21 PDT 2015

This broke the Windows self-host:
http://lab.llvm.org:8011/builders/clang-x86-win2008-selfhost/builds/1279

+  // Some places use this if the address space can't be determined.
+  UNKNOWN_ADDRESS_SPACE = ~0u

On Windows, enums are always ints unless you say otherwise, so clang-cl
gave us this:

..\lib\Target\R600/AMDGPU.h(143,3) :  warning: enumerator value is not
representable in the underlying type 'int' [-Wmicrosoft]
  UNKNOWN_ADDRESS_SPACE = ~0u
  ^
..\lib\Target\R600\SIISelLowering.cpp(263,8) :  error: case value evaluates
to -1, which cannot be narrowed to type 'unsigned int' [-Wc++11-narrowing]
  case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
       ^

I think MSVC doesn't implement C++11 narrowing checks. I'll have a fix soon.

On Thu, Jun 4, 2015 at 9:17 AM, Matt Arsenault <Matthew.Arsenault at amd.com>
wrote:

> Author: arsenm
> Date: Thu Jun  4 11:17:42 2015
> New Revision: 239053
>
> URL: http://llvm.org/viewvc/llvm-project?rev=239053&view=rev
> Log:
> R600/SI: Reimplement isLegalAddressingMode
>
> Now that we sometimes know the address space, this can
> theoretically do a better job.
>
> This needs better test coverage, but this mostly depends on
> first updating the loop optimizatiosn to provide the address
> space.
>
> Added:
>     llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll
> Modified:
>     llvm/trunk/lib/Target/R600/AMDGPU.h
>     llvm/trunk/lib/Target/R600/SIISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/R600/AMDGPU.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.h?rev=239053&r1=239052&r2=239053&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPU.h (original)
> +++ llvm/trunk/lib/Target/R600/AMDGPU.h Thu Jun  4 11:17:42 2015
> @@ -137,7 +137,10 @@ enum AddressSpaces {
>    CONSTANT_BUFFER_14 = 22,
>    CONSTANT_BUFFER_15 = 23,
>    ADDRESS_NONE = 24, ///< Address space for unknown memory.
> -  LAST_ADDRESS = ADDRESS_NONE
> +  LAST_ADDRESS = ADDRESS_NONE,
> +
> +  // Some places use this if the address space can't be determined.
> +  UNKNOWN_ADDRESS_SPACE = ~0u
>  };
>
>  } // namespace AMDGPUAS
>
> Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=239053&r1=239052&r2=239053&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Thu Jun  4 11:17:42 2015
> @@ -250,47 +250,83 @@ bool SITargetLowering::isShuffleMaskLega
>    return false;
>  }
>
> -// FIXME: This really needs an address space argument. The immediate
> offset
> -// size is different for different sets of memory instruction sets.
> -
> -// The single offset DS instructions have a 16-bit unsigned byte offset.
> -//
> -// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can
> do r +
> -// r + i with addr64. 32-bit has more addressing mode options. Depending
> on the
> -// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
> -//
> -// SMRD instructions have an 8-bit, dword offset.
> -//
>  bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
>                                               Type *Ty, unsigned AS) const
> {
>    // No global is ever allowed as a base.
>    if (AM.BaseGV)
>      return false;
>
> -  // Allow a 16-bit unsigned immediate field, since this is what DS
> instructions
> -  // use.
> -  if (!isUInt<16>(AM.BaseOffs))
> -    return false;
> +  switch (AS) {
> +  case AMDGPUAS::GLOBAL_ADDRESS:
> +  case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD
> instructions?
> +  case AMDGPUAS::PRIVATE_ADDRESS:
> +  case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
> +    // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
> +    // additionally can do r + r + i with addr64. 32-bit has more
> addressing
> +    // mode options. Depending on the resource constant, it can also do
> +    // (i64 r0) + (i32 r1) * (i14 i).
> +    //
> +    // SMRD instructions have an 8-bit, dword offset.
> +    //
> +    // Assume nonunifom access, since the address space isn't enough to
> know
> +    // what instruction we will use, and since we don't know if this is a
> load
> +    // or store and scalar stores are only available on VI.
> +    //
> +    // We also know if we are doing an extload, we can't do a scalar load.
> +    //
> +    // Private arrays end up using a scratch buffer most of the time, so
> also
> +    // assume those use MUBUF instructions. Scratch loads / stores are
> currently
> +    // implemented as mubuf instructions with offen bit set, so slightly
> +    // different than the normal addr64.
> +    if (!isUInt<12>(AM.BaseOffs))
> +      return false;
>
> -  // Only support r+r,
> -  switch (AM.Scale) {
> -  case 0:  // "r+i" or just "i", depending on HasBaseReg.
> -    break;
> -  case 1:
> -    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
> +    // FIXME: Since we can split immediate into soffset and immediate
> offset,
> +    // would it make sense to allow any immediate?
> +
> +    switch (AM.Scale) {
> +    case 0: // r + i or just i, depending on HasBaseReg.
> +      return true;
> +    case 1:
> +      return true; // We have r + r or r + i.
> +    case 2:
> +      if (AM.HasBaseReg) {
> +        // Reject 2 * r + r.
> +        return false;
> +      }
> +
> +      // Allow 2 * r as r + r
> +      // Or  2 * r + i is allowed as r + r + i.
> +      return true;
> +    default: // Don't allow n * r
>        return false;
> -    // Otherwise we have r+r or r+i.
> -    break;
> -  case 2:
> -    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
> +    }
> +  }
> +  case AMDGPUAS::LOCAL_ADDRESS:
> +  case AMDGPUAS::REGION_ADDRESS: {
> +    // Basic, single offset DS instructions allow a 16-bit unsigned
> immediate
> +    // field.
> +    // XXX - If doing a 4-byte aligned 8-byte type access, we effectively
> have
> +    // an 8-bit dword offset but we don't know the alignment here.
> +    if (!isUInt<16>(AM.BaseOffs))
>        return false;
> -    // Allow 2*r as r+r.
> -    break;
> -  default: // Don't allow n * r
> +
> +    if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
> +      return true;
> +
> +    if (AM.Scale == 1 && AM.HasBaseReg)
> +      return true;
> +
>      return false;
>    }
> -
> -  return true;
> +  case AMDGPUAS::FLAT_ADDRESS: {
> +    // Flat instructions do not have offsets, and only have the register
> +    // address.
> +    return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
> +  }
> +  default:
> +    llvm_unreachable("unhandled address space");
> +  }
>  }
>
>  bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
>
> Added: llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll?rev=239053&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll (added)
> +++ llvm/trunk/test/CodeGen/R600/cgp-addressing-modes.ll Thu Jun  4
> 11:17:42 2015
> @@ -0,0 +1,242 @@
> +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s |
> FileCheck -check-prefix=OPT %s
> +; RUN: llc -march=amdgcn -mattr=-promote-alloca < %s | FileCheck
> -check-prefix=GCN %s
> +
> +declare i32 @llvm.r600.read.tidig.x() #0
> +
> +; OPT-LABEL: @test_sink_global_small_offset_i32(
> +; OPT-NOT: getelementptr i32, i32 addrspace(1)* %in
> +; OPT: br i1
> +; OPT: ptrtoint
> +
> +; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
> +; GCN: {{^}}BB0_2:
> +define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out,
> i32 addrspace(1)* %in, i32 %cond) {
> +entry:
> +  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
> +  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  %tmp1 = load i32, i32 addrspace(1)* %in.gep
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(1)* %out.gep
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
> +; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
> +; OPT: br i1
> +
> +; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
> +; GCN: s_and_saveexec_b64
> +; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
> +; GCN: {{^}}BB1_2:
> +; GCN: s_or_b64 exec
> +define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)*
> %out, i8 addrspace(1)* %in, i32 %cond) {
> +entry:
> +  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
> +  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  %tmp1 = load i8, i8 addrspace(1)* %in.gep
> +  %tmp2 = sext i8 %tmp1 to i32
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(1)* %out.gep
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
> +; GCN: s_and_saveexec_b64
> +; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0
> offset:4095{{$}}
> +; GCN: {{^}}BB2_2:
> +; GCN: s_or_b64 exec
> +define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)*
> %out, i8 addrspace(1)* %in, i32 %cond) {
> +entry:
> +  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
> +  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  %tmp1 = load i8, i8 addrspace(1)* %in.gep
> +  %tmp2 = sext i8 %tmp1 to i32
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(1)* %out.gep
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
> +; GCN: s_and_saveexec_b64
> +; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
> +; GCN: {{^}}BB3_2:
> +; GCN: s_or_b64 exec
> +define void @test_sink_global_small_max_plus_1_mubuf_offset(i32
> addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) {
> +entry:
> +  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
> +  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  %tmp1 = load i8, i8 addrspace(1)* %in.gep
> +  %tmp2 = sext i8 %tmp1 to i32
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(1)* %out.gep
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
> +; OPT: getelementptr i32, i32 addrspace(4)* %in
> +; OPT: br i1
> +; OPT-NOT: ptrtoint
> +
> +; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
> +; GCN: flat_load_dword
> +; GCN: {{^}}BB4_2:
> +
> +define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out,
> i32 addrspace(4)* %in, i32 %cond) {
> +entry:
> +  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
> +  %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  %tmp1 = load i32, i32 addrspace(4)* %in.gep
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(4)* %out.gep
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +; OPT-LABEL: @test_sink_scratch_small_offset_i32(
> +; OPT-NOT:  getelementptr [512 x i32]
> +; OPT: br i1
> +; OPT: ptrtoint
> +
> +; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
> +; GCN: s_and_saveexec_b64
> +; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}},
> {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
> +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}},
> {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
> +; GCN: {{^}}BB5_2:
> +define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out,
> i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
> +entry:
> +  %alloca = alloca [512 x i32], align 4
> +  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
> +  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
> +  %add.arg = add i32 %arg, 8
> +  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0,
> i32 1023
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  store volatile i32 123, i32* %alloca.gep
> +  %tmp1 = load volatile i32, i32* %alloca.gep
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(1)* %out.gep.0
> +  %load = load volatile i32, i32* %alloca.gep
> +  store i32 %load, i32 addrspace(1)* %out.gep.1
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
> +; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32
> 0, i32 1024
> +; OPT: br i1
> +; OPT-NOT: ptrtoint
> +
> +; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
> +; GCN: s_and_saveexec_b64
> +; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}},
> {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
> +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}},
> {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
> +; GCN: {{^}}BB6_2:
> +define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)*
> %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
> +entry:
> +  %alloca = alloca [512 x i32], align 4
> +  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
> +  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
> +  %add.arg = add i32 %arg, 8
> +  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0,
> i32 1024
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  store volatile i32 123, i32* %alloca.gep
> +  %tmp1 = load volatile i32, i32* %alloca.gep
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(1)* %out.gep.0
> +  %load = load volatile i32, i32* %alloca.gep
> +  store i32 %load, i32 addrspace(1)* %out.gep.1
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
> +; GCN: s_and_saveexec_b64
> +; GCN: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}},
> {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
> +; GCN: {{^}}BB7_2:
> +define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32
> addrspace(1)* %in, i32 %offset, i32 %cond) {
> +entry:
> +  %offset.ext = zext i32 %offset to i64
> +  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
> +  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
> +  %tmp0 = icmp eq i32 %cond, 0
> +  br i1 %tmp0, label %endif, label %if
> +
> +if:
> +  %tmp1 = load i32, i32 addrspace(1)* %in.gep
> +  br label %endif
> +
> +endif:
> +  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
> +  store i32 %x, i32 addrspace(1)* %out.gep
> +  br label %done
> +
> +done:
> +  ret void
> +}
> +
> +attributes #0 = { nounwind readnone }
> +attributes #1 = { nounwind }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150608/6771c86e/attachment.html>