[PATCH] R600: Error on initializer for LDS.

Tom Stellard tom at stellard.net
Thu Nov 13 09:07:35 PST 2014


On Mon, Nov 03, 2014 at 12:18:10AM +0000, Matt Arsenault wrote:
> Also give a proper error for other address spaces.
> 
> Sort of depends on clang patch to not emit all LDS objects with zeroinitializer anymore.

LGTM.

> 
> http://reviews.llvm.org/D6078
> 
> Files:
>   lib/Target/R600/AMDGPUISelLowering.cpp
>   test/CodeGen/R600/32-bit-local-address-space.ll
>   test/CodeGen/R600/ds_read2.ll
>   test/CodeGen/R600/ds_read2st64.ll
>   test/CodeGen/R600/ds_write2.ll
>   test/CodeGen/R600/ds_write2st64.ll
>   test/CodeGen/R600/global-zero-initializer.ll
>   test/CodeGen/R600/lds-initializer.ll
>   test/CodeGen/R600/lds-output-queue.ll
>   test/CodeGen/R600/lds-size.ll
>   test/CodeGen/R600/lds-zero-initializer.ll
>   test/CodeGen/R600/load.ll
>   test/CodeGen/R600/local-memory-two-objects.ll
>   test/CodeGen/R600/local-memory.ll
>   test/CodeGen/R600/missing-store.ll
>   test/CodeGen/R600/shl_add_ptr.ll

> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -691,6 +691,17 @@
>    llvm_unreachable("Unhandled constant initializer");
>  }
>  
> +static bool hasDefinedInitializer(const GlobalValue *GV) {
> +  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
> +  if (!GVar || !GVar->hasInitializer())
> +    return false;
> +
> +  if (isa<UndefValue>(GVar->getInitializer()))
> +    return false;
> +
> +  return true;
> +}
> +
>  SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>                                                   SDValue Op,
>                                                   SelectionDAG &DAG) const {
> @@ -700,13 +711,15 @@
>    const GlobalValue *GV = G->getGlobal();
>  
>    switch (G->getAddressSpace()) {
> -  default: llvm_unreachable("Global Address lowering not implemented for this "
> -                            "address space");
>    case AMDGPUAS::LOCAL_ADDRESS: {
>      // XXX: What does the value of G->getOffset() mean?
>      assert(G->getOffset() == 0 &&
>           "Do not know what to do with an non-zero offset");
>  
> +    // TODO: We could emit code to handle the initialization somewhere.
> +    if (hasDefinedInitializer(GV))
> +      break;
> +
>      unsigned Offset;
>      if (MFI->LocalMemoryObjects.count(GV) == 0) {
>        uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
> @@ -760,6 +773,12 @@
>      return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
>    }
>    }
> +
> +  const Function &Fn = *DAG.getMachineFunction().getFunction();
> +  DiagnosticInfoUnsupported BadInit(Fn,
> +                                    "initializer for address space");
> +  DAG.getContext()->diagnose(BadInit);
> +  return SDValue();
>  }
>  
>  SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
> Index: test/CodeGen/R600/32-bit-local-address-space.ll
> ===================================================================
> --- test/CodeGen/R600/32-bit-local-address-space.ll
> +++ test/CodeGen/R600/32-bit-local-address-space.ll
> @@ -78,7 +78,7 @@
>    ret void
>  }
>  
> - at g_lds = addrspace(3) global float zeroinitializer, align 4
> + at g_lds = addrspace(3) global float undef, align 4
>  
>  ; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
>  ; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
> @@ -90,8 +90,8 @@
>  }
>  
>  
> - at ptr = addrspace(3) global i32 addrspace(3)* null
> - at dst = addrspace(3) global [16384 x i32] zeroinitializer
> + at ptr = addrspace(3) global i32 addrspace(3)* undef
> + at dst = addrspace(3) global [16384 x i32] undef
>  
>  ; FUNC-LABEL: {{^}}global_ptr:
>  ; CHECK: DS_WRITE_B32
> Index: test/CodeGen/R600/ds_read2.ll
> ===================================================================
> --- test/CodeGen/R600/ds_read2.ll
> +++ test/CodeGen/R600/ds_read2.ll
> @@ -3,8 +3,8 @@
>  ; FIXME: We don't get cases where the address was an SGPR because we
>  ; get a copy to the address register for each one.
>  
> - at lds = addrspace(3) global [512 x float] zeroinitializer, align 4
> - @lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
> + at lds = addrspace(3) global [512 x float] undef, align 4
> + @lds.f64 = addrspace(3) global [512 x double] undef, align 8
>  
>  ; SI-LABEL: @simple_read2_f32
>  ; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
> @@ -382,7 +382,7 @@
>    ret void
>  }
>  
> - at foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
> + at foo = addrspace(3) global [4 x i32] undef, align 4
>  
>  ; SI-LABEL: @load_constant_adjacent_offsets
>  ; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> @@ -406,7 +406,7 @@
>    ret void
>  }
>  
> - at bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
> + at bar = addrspace(3) global [4 x i64] undef, align 4
>  
>  ; SI-LABEL: @load_misaligned64_constant_offsets
>  ; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> @@ -420,7 +420,7 @@
>    ret void
>  }
>  
> - at bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
> + at bar.large = addrspace(3) global [4096 x i64] undef, align 4
>  
>  ; SI-LABEL: @load_misaligned64_constant_large_offsets
>  ; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
> @@ -436,8 +436,8 @@
>    ret void
>  }
>  
> - at sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
> - at sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
> + at sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
> + at sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
>  
>  define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
> Index: test/CodeGen/R600/ds_read2st64.ll
> ===================================================================
> --- test/CodeGen/R600/ds_read2st64.ll
> +++ test/CodeGen/R600/ds_read2st64.ll
> @@ -1,7 +1,7 @@
>  ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
>  
> - at lds = addrspace(3) global [512 x float] zeroinitializer, align 4
> - at lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
> + at lds = addrspace(3) global [512 x float] undef, align 4
> + at lds.f64 = addrspace(3) global [512 x double] undef, align 8
>  
>  
>  ; SI-LABEL: @simple_read2st64_f32_0_1
> Index: test/CodeGen/R600/ds_write2.ll
> ===================================================================
> --- test/CodeGen/R600/ds_write2.ll
> +++ test/CodeGen/R600/ds_write2.ll
> @@ -1,7 +1,7 @@
>  ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
>  
> - at lds = addrspace(3) global [512 x float] zeroinitializer, align 4
> - at lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
> + at lds = addrspace(3) global [512 x float] undef, align 4
> + at lds.f64 = addrspace(3) global [512 x double] undef, align 8
>  
>  
>  ; SI-LABEL: @simple_write2_one_val_f32
> @@ -320,7 +320,7 @@
>    ret void
>  }
>  
> - at foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
> + at foo = addrspace(3) global [4 x i32] undef, align 4
>  
>  ; SI-LABEL: @store_constant_adjacent_offsets
>  ; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> @@ -341,7 +341,7 @@
>    ret void
>  }
>  
> - at bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
> + at bar = addrspace(3) global [4 x i64] undef, align 4
>  
>  ; SI-LABEL: @store_misaligned64_constant_offsets
>  ; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> @@ -353,7 +353,7 @@
>    ret void
>  }
>  
> - at bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
> + at bar.large = addrspace(3) global [4096 x i64] undef, align 4
>  
>  ; SI-LABEL: @store_misaligned64_constant_large_offsets
>  ; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
> @@ -367,8 +367,8 @@
>    ret void
>  }
>  
> - at sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
> - at sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
> + at sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
> + at sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
>  
>  define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
>    %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
> Index: test/CodeGen/R600/ds_write2st64.ll
> ===================================================================
> --- test/CodeGen/R600/ds_write2st64.ll
> +++ test/CodeGen/R600/ds_write2st64.ll
> @@ -1,7 +1,7 @@
>  ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
>  
>  
> - at lds = addrspace(3) global [512 x float] zeroinitializer, align 4
> + at lds = addrspace(3) global [512 x float] undef, align 4
>  
>  
>  ; SI-LABEL: @simple_write2st64_one_val_f32_0_1
> Index: test/CodeGen/R600/global-zero-initializer.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/global-zero-initializer.ll
> @@ -0,0 +1,12 @@
> +; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s
> +
> +; CHECK: error: unsupported initializer for address space in load_init_global_global
> +
> + at lds = addrspace(1) global [256 x i32] zeroinitializer
> +
> +define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
> + %gep = getelementptr [256 x i32] addrspace(1)* @lds, i32 0, i32 10
> +  %ld = load i32 addrspace(1)* %gep
> +  store i32 %ld, i32 addrspace(1)* %out
> +  ret void
> +}
> Index: test/CodeGen/R600/lds-initializer.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/lds-initializer.ll
> @@ -0,0 +1,12 @@
> +; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s
> +
> +; CHECK: error: unsupported initializer for address space in load_init_lds_global
> +
> + at lds = addrspace(3) global [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8]
> +
> +define void @load_init_lds_global(i32 addrspace(1)* %out, i1 %p) {
> + %gep = getelementptr [8 x i32] addrspace(3)* @lds, i32 0, i32 10
> +  %ld = load i32 addrspace(3)* %gep
> +  store i32 %ld, i32 addrspace(1)* %out
> +  ret void
> +}
> Index: test/CodeGen/R600/lds-output-queue.ll
> ===================================================================
> --- test/CodeGen/R600/lds-output-queue.ll
> +++ test/CodeGen/R600/lds-output-queue.ll
> @@ -8,7 +8,7 @@
>  ; CHECK-NOT: ALU clause
>  ; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
>  
> - at local_mem = internal unnamed_addr addrspace(3) global [2 x i32] [i32 1, i32 2], align 4
> + at local_mem = internal unnamed_addr addrspace(3) global [2 x i32] undef, align 4
>  
>  define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
>  entry:
> Index: test/CodeGen/R600/lds-size.ll
> ===================================================================
> --- test/CodeGen/R600/lds-size.ll
> +++ test/CodeGen/R600/lds-size.ll
> @@ -6,7 +6,7 @@
>  ; CHECK-LABEL: {{^}}test:
>  ; CHECK: .long   166120
>  ; CHECK-NEXT: .long   1
> - at lds = internal unnamed_addr addrspace(3) global i32 zeroinitializer, align 4
> + at lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
>  
>  define void @test(i32 addrspace(1)* %out, i32 %cond) {
>  entry:
> Index: test/CodeGen/R600/lds-zero-initializer.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/lds-zero-initializer.ll
> @@ -0,0 +1,12 @@
> +; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s
> +
> +; CHECK: error: unsupported initializer for address space in load_zeroinit_lds_global
> +
> + at lds = addrspace(3) global [256 x i32] zeroinitializer
> +
> +define void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
> + %gep = getelementptr [256 x i32] addrspace(3)* @lds, i32 0, i32 10
> +  %ld = load i32 addrspace(3)* %gep
> +  store i32 %ld, i32 addrspace(1)* %out
> +  ret void
> +}
> Index: test/CodeGen/R600/load.ll
> ===================================================================
> --- test/CodeGen/R600/load.ll
> +++ test/CodeGen/R600/load.ll
> @@ -721,7 +721,7 @@
>  }
>  
>  
> - at lds = addrspace(3) global [512 x i32] zeroinitializer, align 4
> + at lds = addrspace(3) global [512 x i32] undef, align 4
>  
>  ; On SI we need to make sure that the base offset is a register and not
>  ; an immediate.
> Index: test/CodeGen/R600/local-memory-two-objects.ll
> ===================================================================
> --- test/CodeGen/R600/local-memory-two-objects.ll
> +++ test/CodeGen/R600/local-memory-two-objects.ll
> @@ -2,8 +2,8 @@
>  ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=SI %s
>  ; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=CI %s
>  
> - at local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
> - at local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
> + at local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
> + at local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
>  
>  ; EG-CHECK: {{^}}local_memory_two_objects:
>  
> Index: test/CodeGen/R600/local-memory.ll
> ===================================================================
> --- test/CodeGen/R600/local-memory.ll
> +++ test/CodeGen/R600/local-memory.ll
> @@ -2,7 +2,7 @@
>  ; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
>  ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
>  
> - at local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] zeroinitializer, align 4
> + at local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
>  
>  ; FUNC-LABEL: {{^}}local_memory:
>  
> Index: test/CodeGen/R600/missing-store.ll
> ===================================================================
> --- test/CodeGen/R600/missing-store.ll
> +++ test/CodeGen/R600/missing-store.ll
> @@ -1,6 +1,6 @@
>  ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
>  
> - at ptr_load = addrspace(3) global i32 addrspace(2)* null, align 8
> + at ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8
>  
>  ; Make sure when the load from %ptr2 is folded the chain isn't lost,
>  ; resulting in losing the store to gptr
> Index: test/CodeGen/R600/shl_add_ptr.ll
> ===================================================================
> --- test/CodeGen/R600/shl_add_ptr.ll
> +++ test/CodeGen/R600/shl_add_ptr.ll
> @@ -8,8 +8,8 @@
>  
>  declare i32 @llvm.r600.read.tidig.x() #1
>  
> - at lds0 = addrspace(3) global [512 x float] zeroinitializer, align 4
> - at lds1 = addrspace(3) global [512 x float] zeroinitializer, align 4
> + at lds0 = addrspace(3) global [512 x float] undef, align 4
> + at lds1 = addrspace(3) global [512 x float] undef, align 4
>  
>  
>  ; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
> @@ -49,7 +49,7 @@
>    ret void
>  }
>  
> - at maxlds = addrspace(3) global [65536 x i8] zeroinitializer, align 4
> + at maxlds = addrspace(3) global [65536 x i8] undef, align 4
>  
>  ; SI-LABEL: {{^}}load_shl_base_lds_max_offset
>  ; SI: DS_READ_U8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
> @@ -100,7 +100,7 @@
>  ; --------------------------------------------------------------------------------
>  ; Atomics.
>  
> - at lds2 = addrspace(3) global [512 x i32] zeroinitializer, align 4
> + at lds2 = addrspace(3) global [512 x i32] undef, align 4
>  
>  ; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
>  ;   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list