[llvm] r293580 - AMDGPU: Implement hook for InferAddressSpaces
Michael Kuperstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 31 14:50:16 PST 2017
Hi Matt,
I'm getting a warning with gcc 4.8 on this:
../lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h:101:66: warning: enumeral
and non-enumeral type in conditional expression [enabled by default]
return ST->hasFlatAddressSpace() ? AMDGPUAS::FLAT_ADDRESS : -1;
Thanks,
Michael
On Mon, Jan 30, 2017 at 5:20 PM, Matt Arsenault via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: arsenm
> Date: Mon Jan 30 19:20:54 2017
> New Revision: 293580
>
> URL: http://llvm.org/viewvc/llvm-project?rev=293580&view=rev
> Log:
> AMDGPU: Implement hook for InferAddressSpaces
>
> For now just port some of the existing NVPTX tests
> and from an old HSAIL optimization pass which
> approximately did the same thing.
>
> Don't enable the pass yet until more testing is done.
>
> Added:
> llvm/trunk/test/Transforms/InferAddressSpaces/
> llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/
> llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
> llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/
> infer-address-space.ll
> llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-
> pass-regressions.ll
> llvm/trunk/test/Transforms/InferAddressSpaces/lit.local.cfg
> Modified:
> llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
> llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> AMDGPU/AMDGPUSubtarget.h?rev=293580&r1=293579&r2=293580&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Jan 30 19:20:54
> 2017
> @@ -312,6 +312,10 @@ public:
> return EnableXNACK;
> }
>
> + bool hasFlatAddressSpace() const {
> + return FlatAddressSpace;
> + }
> +
> bool isMesaKernel(const MachineFunction &MF) const {
> return isMesa3DOS() && !AMDGPU::isShader(MF.
> getFunction()->getCallingConv());
> }
> @@ -554,10 +558,6 @@ public:
> return 16;
> }
>
> - bool hasFlatAddressSpace() const {
> - return FlatAddressSpace;
> - }
> -
> bool hasSMemRealTime() const {
> return HasSMemRealTime;
> }
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/
> AMDGPUTargetTransformInfo.h?rev=293580&r1=293579&r2=293580&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Mon Jan 30
> 19:20:54 2017
> @@ -32,6 +32,7 @@ class AMDGPUTTIImpl final : public Basic
>
> const AMDGPUSubtarget *ST;
> const AMDGPUTargetLowering *TLI;
> + bool IsGraphicsShader;
>
> const AMDGPUSubtarget *getST() const { return ST; }
> const AMDGPUTargetLowering *getTLI() const { return TLI; }
> @@ -62,7 +63,8 @@ public:
> explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
> : BaseT(TM, F.getParent()->getDataLayout()),
> ST(TM->getSubtargetImpl(F)),
> - TLI(ST->getTargetLowering()) {}
> + TLI(ST->getTargetLowering()),
> + IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
>
> bool hasBranchDivergence() { return true; }
>
> @@ -91,6 +93,14 @@ public:
> int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
> bool isSourceOfDivergence(const Value *V) const;
>
> + unsigned getFlatAddressSpace() const {
> + // Don't bother running InferAddressSpaces pass on graphics shaders
> which
> + // don't use flat addressing.
> + if (IsGraphicsShader)
> + return -1;
> + return ST->hasFlatAddressSpace() ? AMDGPUAS::FLAT_ADDRESS : -1;
> + }
> +
> unsigned getVectorSplitCost() { return 0; }
> };
>
>
> Added: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Transforms/InferAddressSpaces/AMDGPU/basic.ll?rev=293580&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll (added)
> +++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll Mon Jan
> 30 19:20:54 2017
> @@ -0,0 +1,131 @@
> +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s |
> FileCheck %s
> +
> +; Trivial optimization of generic addressing
> +
> +; CHECK-LABEL: @load_global_from_flat(
> +; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar
> to float addrspace(1)*
> +; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0
> +; CHECK-NEXT: ret float %tmp1
> +define float @load_global_from_flat(float addrspace(4)* %generic_scalar)
> #0 {
> + %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float
> addrspace(1)*
> + %tmp1 = load float, float addrspace(1)* %tmp0
> + ret float %tmp1
> +}
> +
> +; CHECK-LABEL: @load_constant_from_flat(
> +; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar
> to float addrspace(2)*
> +; CHECK-NEXT: %tmp1 = load float, float addrspace(2)* %tmp0
> +; CHECK-NEXT: ret float %tmp1
> +define float @load_constant_from_flat(float addrspace(4)*
> %generic_scalar) #0 {
> + %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float
> addrspace(2)*
> + %tmp1 = load float, float addrspace(2)* %tmp0
> + ret float %tmp1
> +}
> +
> +; CHECK-LABEL: @load_group_from_flat(
> +; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar
> to float addrspace(3)*
> +; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0
> +; CHECK-NEXT: ret float %tmp1
> +define float @load_group_from_flat(float addrspace(4)* %generic_scalar)
> #0 {
> + %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float
> addrspace(3)*
> + %tmp1 = load float, float addrspace(3)* %tmp0
> + ret float %tmp1
> +}
> +
> +; CHECK-LABEL: @load_private_from_flat(
> +; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar
> to float*
> +; CHECK-NEXT: %tmp1 = load float, float* %tmp0
> +; CHECK-NEXT: ret float %tmp1
> +define float @load_private_from_flat(float addrspace(4)* %generic_scalar)
> #0 {
> + %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
> + %tmp1 = load float, float* %tmp0
> + ret float %tmp1
> +}
> +
> +; CHECK-LABEL: @store_global_from_flat(
> +; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar
> to float addrspace(1)*
> +; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0
> +define void @store_global_from_flat(float addrspace(4)* %generic_scalar)
> #0 {
> + %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float
> addrspace(1)*
> + store float 0.0, float addrspace(1)* %tmp0
> + ret void
> +}
> +
> +; CHECK-LABEL: @store_group_from_flat(
> +; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar
> to float addrspace(3)*
> +; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0
> +define void @store_group_from_flat(float addrspace(4)* %generic_scalar)
> #0 {
> + %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float
> addrspace(3)*
> + store float 0.0, float addrspace(3)* %tmp0
> + ret void
> +}
> +
> +; CHECK-LABEL: @store_private_from_flat(
> +; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar
> to float*
> +; CHECK-NEXT: store float 0.000000e+00, float* %tmp0
> +define void @store_private_from_flat(float addrspace(4)* %generic_scalar)
> #0 {
> + %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
> + store float 0.0, float* %tmp0
> + ret void
> +}
> +
> +; optimized to global load/store.
> +; CHECK-LABEL: @load_store_global(
> +; CHECK-NEXT: %val = load i32, i32 addrspace(1)* %input, align 4
> +; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4
> +; CHECK-NEXT: ret void
> +define void @load_store_global(i32 addrspace(1)* nocapture %input, i32
> addrspace(1)* nocapture %output) #0 {
> + %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
> + %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
> + %val = load i32, i32 addrspace(4)* %tmp0, align 4
> + store i32 %val, i32 addrspace(4)* %tmp1, align 4
> + ret void
> +}
> +
> +; Optimized to group load/store.
> +; CHECK-LABEL: @load_store_group(
> +; CHECK-NEXT: %val = load i32, i32 addrspace(3)* %input, align 4
> +; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4
> +; CHECK-NEXT: ret void
> +define void @load_store_group(i32 addrspace(3)* nocapture %input, i32
> addrspace(3)* nocapture %output) #0 {
> + %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
> + %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
> + %val = load i32, i32 addrspace(4)* %tmp0, align 4
> + store i32 %val, i32 addrspace(4)* %tmp1, align 4
> + ret void
> +}
> +
> +; Optimized to private load/store.
> +; CHECK-LABEL: @load_store_private(
> +; CHECK-NEXT: %val = load i32, i32* %input, align 4
> +; CHECK-NEXT: store i32 %val, i32* %output, align 4
> +; CHECK-NEXT: ret void
> +define void @load_store_private(i32* nocapture %input, i32* nocapture
> %output) #0 {
> + %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
> + %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
> + %val = load i32, i32 addrspace(4)* %tmp0, align 4
> + store i32 %val, i32 addrspace(4)* %tmp1, align 4
> + ret void
> +}
> +
> +; No optimization. flat load/store.
> +; CHECK-LABEL: @load_store_flat(
> +; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4
> +; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4
> +; CHECK-NEXT: ret void
> +define void @load_store_flat(i32 addrspace(4)* nocapture %input, i32
> addrspace(4)* nocapture %output) #0 {
> + %val = load i32, i32 addrspace(4)* %input, align 4
> + store i32 %val, i32 addrspace(4)* %output, align 4
> + ret void
> +}
> +
> +; CHECK-LABEL: @store_addrspacecast_ptr_value(
> +; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32
> addrspace(4)*
> +; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)*
> addrspace(1)* %output, align 4
> +define void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture
> %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 {
> + %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
> + store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output,
> align 4
> + ret void
> +}
> +
> +attributes #0 = { nounwind }
>
> Added: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/
> infer-address-space.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll?
> rev=293580&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
> (added)
> +++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
> Mon Jan 30 19:20:54 2017
> @@ -0,0 +1,176 @@
> +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s |
> FileCheck %s
> +; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll
> +
> + at scalar = internal addrspace(3) global float 0.0, align 4
> + at array = internal addrspace(3) global [10 x float] zeroinitializer, align
> 4
> +
> +; CHECK-LABEL: @load_store_lds_f32(
> +; CHECK: %tmp = load float, float addrspace(3)* @scalar, align 4
> +; CHECK: call void @use(float %tmp)
> +; CHECK: store float %v, float addrspace(3)* @scalar, align 4
> +; CHECK: call void @llvm.amdgcn.s.barrier()
> +; CHECK: %tmp2 = load float, float addrspace(3)* @scalar, align 4
> +; CHECK: call void @use(float %tmp2)
> +; CHECK: store float %v, float addrspace(3)* @scalar, align 4
> +; CHECK: call void @llvm.amdgcn.s.barrier()
> +; CHECK: %tmp3 = load float, float addrspace(3)* getelementptr inbounds
> ([10 x float], [10 x float] addrspace(3)* @array, i32 0, i32 5), align 4
> +; CHECK: call void @use(float %tmp3)
> +; CHECK: store float %v, float addrspace(3)* getelementptr inbounds ([10
> x float], [10 x float] addrspace(3)* @array, i32 0, i32 5), align 4
> +; CHECK: call void @llvm.amdgcn.s.barrier()
> +; CHECK: %tmp4 = getelementptr inbounds [10 x float], [10 x float]
> addrspace(3)* @array, i32 0, i32 5
> +; CHECK: %tmp5 = load float, float addrspace(3)* %tmp4, align 4
> +; CHECK: call void @use(float %tmp5)
> +; CHECK: store float %v, float addrspace(3)* %tmp4, align 4
> +; CHECK: call void @llvm.amdgcn.s.barrier()
> +; CHECK: %tmp7 = getelementptr inbounds [10 x float], [10 x float]
> addrspace(3)* @array, i32 0, i32 %i
> +; CHECK: %tmp8 = load float, float addrspace(3)* %tmp7, align 4
> +; CHECK: call void @use(float %tmp8)
> +; CHECK: store float %v, float addrspace(3)* %tmp7, align 4
> +; CHECK: call void @llvm.amdgcn.s.barrier()
> +; CHECK: ret void
> +define void @load_store_lds_f32(i32 %i, float %v) #0 {
> +bb:
> + %tmp = load float, float addrspace(4)* addrspacecast (float
> addrspace(3)* @scalar to float addrspace(4)*), align 4
> + call void @use(float %tmp)
> + store float %v, float addrspace(4)* addrspacecast (float addrspace(3)*
> @scalar to float addrspace(4)*), align 4
> + call void @llvm.amdgcn.s.barrier()
> + %tmp1 = addrspacecast float addrspace(3)* @scalar to float addrspace(4)*
> + %tmp2 = load float, float addrspace(4)* %tmp1, align 4
> + call void @use(float %tmp2)
> + store float %v, float addrspace(4)* %tmp1, align 4
> + call void @llvm.amdgcn.s.barrier()
> + %tmp3 = load float, float addrspace(4)* getelementptr inbounds ([10 x
> float], [10 x float] addrspace(4)* addrspacecast ([10 x float]
> addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
> + call void @use(float %tmp3)
> + store float %v, float addrspace(4)* getelementptr inbounds ([10 x
> float], [10 x float] addrspace(4)* addrspacecast ([10 x float]
> addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
> + call void @llvm.amdgcn.s.barrier()
> + %tmp4 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)*
> addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]
> addrspace(4)*), i32 0, i32 5
> + %tmp5 = load float, float addrspace(4)* %tmp4, align 4
> + call void @use(float %tmp5)
> + store float %v, float addrspace(4)* %tmp4, align 4
> + call void @llvm.amdgcn.s.barrier()
> + %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]
> addrspace(4)*
> + %tmp7 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)*
> %tmp6, i32 0, i32 %i
> + %tmp8 = load float, float addrspace(4)* %tmp7, align 4
> + call void @use(float %tmp8)
> + store float %v, float addrspace(4)* %tmp7, align 4
> + call void @llvm.amdgcn.s.barrier()
> + ret void
> +}
> +
> +; CHECK-LABEL: @constexpr_load_int_from_float_lds(
> +; CHECK: %tmp = load i32, i32 addrspace(3)* bitcast (float addrspace(3)*
> @scalar to i32 addrspace(3)*), align 4
> +define i32 @constexpr_load_int_from_float_lds() #0 {
> +bb:
> + %tmp = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)*
> bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32
> addrspace(4)*), align 4
> + ret i32 %tmp
> +}
> +
> +; CHECK-LABEL: @load_int_from_global_float(
> +; CHECK: %tmp1 = getelementptr float, float addrspace(1)* %input, i32 %i
> +; CHECK: %tmp2 = getelementptr float, float addrspace(1)* %tmp1, i32 %j
> +; CHECK: %tmp3 = bitcast float addrspace(1)* %tmp2 to i32 addrspace(1)*
> +; CHECK: %tmp4 = load i32, i32 addrspace(1)* %tmp3
> +; CHECK: ret i32 %tmp4
> +define i32 @load_int_from_global_float(float addrspace(1)* %input, i32
> %i, i32 %j) #0 {
> +bb:
> + %tmp = addrspacecast float addrspace(1)* %input to float addrspace(4)*
> + %tmp1 = getelementptr float, float addrspace(4)* %tmp, i32 %i
> + %tmp2 = getelementptr float, float addrspace(4)* %tmp1, i32 %j
> + %tmp3 = bitcast float addrspace(4)* %tmp2 to i32 addrspace(4)*
> + %tmp4 = load i32, i32 addrspace(4)* %tmp3
> + ret i32 %tmp4
> +}
> +
> +; CHECK-LABEL: @nested_const_expr(
> +; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)*
> getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array,
> i64 0, i64 1) to i32 addrspace(3)*), align 4
> +define void @nested_const_expr() #0 {
> + store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)*
> getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10
> x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1)
> to i32 addrspace(4)*), align 4
> + ret void
> +}
> +
> +; CHECK-LABEL: @rauw(
> +; CHECK: %addr = getelementptr float, float addrspace(1)* %input, i64 10
> +; CHECK-NEXT: %v = load float, float addrspace(1)* %addr
> +; CHECK-NEXT: store float %v, float addrspace(1)* %addr
> +; CHECK-NEXT: ret void
> +define void @rauw(float addrspace(1)* %input) #0 {
> +bb:
> + %generic_input = addrspacecast float addrspace(1)* %input to float
> addrspace(4)*
> + %addr = getelementptr float, float addrspace(4)* %generic_input, i64 10
> + %v = load float, float addrspace(4)* %addr
> + store float %v, float addrspace(4)* %addr
> + ret void
> +}
> +
> +; FIXME: Should be able to eliminate the cast inside the loop
> +; CHECK-LABEL: @loop(
> +
> +; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float
> addrspace(3)*
> +; CHECK: %0 = addrspacecast float addrspace(3)* %p to float addrspace(4)*
> +; CHECK: %end = getelementptr float, float addrspace(4)* %0, i64 10
> +; CHECK: br label %loop
> +
> +; CHECK: loop: ; preds =
> %loop, %entry
> +; CHECK: %i = phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ]
> +; CHECK: %v = load float, float addrspace(3)* %i
> +; CHECK: call void @use(float %v)
> +; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
> +; CHECK: %1 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)*
> +; CHECK: %exit_cond = icmp eq float addrspace(4)* %1, %end
> +; CHECK: br i1 %exit_cond, label %exit, label %loop
> +define void @loop() #0 {
> +entry:
> + %p = addrspacecast [10 x float] addrspace(3)* @array to float
> addrspace(4)*
> + %end = getelementptr float, float addrspace(4)* %p, i64 10
> + br label %loop
> +
> +loop: ; preds = %loop, %entry
> + %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
> + %v = load float, float addrspace(4)* %i
> + call void @use(float %v)
> + %i2 = getelementptr float, float addrspace(4)* %i, i64 1
> + %exit_cond = icmp eq float addrspace(4)* %i2, %end
> + br i1 %exit_cond, label %exit, label %loop
> +
> +exit: ; preds = %loop
> + ret void
> +}
> +
> + at generic_end = external addrspace(1) global float addrspace(4)*
> +
> +; CHECK-LABEL: @loop_with_generic_bound(
> +; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float
> addrspace(3)*
> +; CHECK: %end = load float addrspace(4)*, float addrspace(4)*
> addrspace(1)* @generic_end
> +; CHECK: br label %loop
> +
> +; CHECK: loop:
> +; CHECK: %i = phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ]
> +; CHECK: %v = load float, float addrspace(3)* %i
> +; CHECK: call void @use(float %v)
> +; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
> +; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)*
> +; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end
> +; CHECK: br i1 %exit_cond, label %exit, label %loop
> +define void @loop_with_generic_bound() #0 {
> +entry:
> + %p = addrspacecast [10 x float] addrspace(3)* @array to float
> addrspace(4)*
> + %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)*
> @generic_end
> + br label %loop
> +
> +loop: ; preds = %loop, %entry
> + %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
> + %v = load float, float addrspace(4)* %i
> + call void @use(float %v)
> + %i2 = getelementptr float, float addrspace(4)* %i, i64 1
> + %exit_cond = icmp eq float addrspace(4)* %i2, %end
> + br i1 %exit_cond, label %exit, label %loop
> +
> +exit: ; preds = %loop
> + ret void
> +}
> +
> +declare void @llvm.amdgcn.s.barrier() #1
> +declare void @use(float) #0
> +
> +attributes #0 = { nounwind }
> +attributes #1 = { convergent nounwind }
>
> Added: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-
> pass-regressions.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.
> ll?rev=293580&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
> (added)
> +++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
> Mon Jan 30 19:20:54 2017
> @@ -0,0 +1,143 @@
> +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s |
> FileCheck %s
> +
> +; Regression tests from old HSAIL addrspacecast optimization pass
> +
> + at data = internal addrspace(1) global [100 x double] [double 0.00, double
> 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double
> 4.000000e-01, double 5.000000e-01, double 6.000000e-01, double
> 7.000000e-01, double 8.000000e-01, double 9.000000e-01, double 1.00, double
> 1.10, double 1.20, double 1.30, double 1.40, double 1.50, double 1.60,
> double 1.70, double 1.80, double 1.90, double 2.00, double 2.10, double
> 2.20, double 2.30, double 2.40, double 2.50, double 2.60, double 2.70,
> double 2.80, double 2.90, double 3.00, double 3.10, double 3.20, double
> 3.30, double 3.40, double 3.50, double 3.60, double 3.70, double 3.80,
> double 3.90, double 4.00, double 4.10, double 4.20, double 4.30, double
> 4.40, double 4.50, double 4.60, double 4.70, double 4.80, double 4.90,
> double 5.00, double 5.10, double 5.20, double 5.30, double 5.40, double
> 5.50, double 5.60, double 5.70, double 5.80, double 5.90, double 6.00,
> double 6.10, double 6.20, double 6.30, double 6.40, double 6.50, double
> 6.60, d
> ouble 6.70, double 6.80, double 6.90, double 7.00, double 7.10, double
> 7.20, double 7.30, double 7.40, double 7.50, double 7.60, double 7.70,
> double 7.80, double 7.90, double 8.00, double 8.10, double 8.20, double
> 8.30, double 8.40, double 8.50, double 8.60, double 8.70, double 8.80,
> double 8.90, double 9.00, double 9.10, double 9.20, double 9.30, double
> 9.40, double 9.50, double 9.60, double 9.70, double 9.80, double 9.90],
> align 8
> +
> +
> +; Should generate flat load
> +
> +; CHECK-LABEL: @generic_address_bitcast_const(
> +; CHECK: %vecload1 = load <2 x double>, <2 x double> addrspace(1)*
> bitcast (double addrspace(1)* getelementptr inbounds ([100 x double], [100
> x double] addrspace(1)* @data, i64 0, i64 4) to <2 x double>
> addrspace(1)*), align 8
> +define void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)*
> nocapture %results) #0 {
> +entry:
> + %tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
> + %tmp2 = zext i32 %tmp1 to i64
> + %tmp3 = add i64 %tmp2, %arg0
> + %vecload1 = load <2 x double>, <2 x double> addrspace(4)* bitcast
> (double addrspace(4)* getelementptr ([100 x double], [100 x double]
> addrspace(4)* addrspacecast ([100 x double] addrspace(1)* @data to [100 x
> double] addrspace(4)*), i64 0, i64 4) to <2 x double> addrspace(4)*), align
> 8
> + %cmp = fcmp ord <2 x double> %vecload1, zeroinitializer
> + %sext = sext <2 x i1> %cmp to <2 x i64>
> + %tmp4 = extractelement <2 x i64> %sext, i64 0
> + %tmp5 = extractelement <2 x i64> %sext, i64 1
> + %tmp6 = and i64 %tmp4, %tmp5
> + %tmp7 = lshr i64 %tmp6, 63
> + %tmp8 = trunc i64 %tmp7 to i32
> + %idxprom = and i64 %tmp3, 4294967295
> + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64
> %idxprom
> + store i32 %tmp8, i32 addrspace(1)* %arrayidx, align 4
> + ret void
> +}
> +
> + at generic_address_bug9749.val = internal addrspace(1) global float 0.0,
> align 4
> +
> +declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*)
> +%opencl.pipe_t = type opaque
> +
> +; This is a compile time assert bug, but we still want to check
> optimization
> +; is performed to generate ld_global.
> +; CHECK-LABEL: @generic_address_pipe_bug9673(
> +; CHECK: %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32
> addrspace(3)*
> +; CHECK: %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1,
> i32 2
> +; CHECK: %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
> +define void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)*
> nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 {
> +entry:
> + %tmp = call i32 @llvm.amdgcn.workitem.id.x()
> + %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32
> addrspace(3)*
> + %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
> + %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
> + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %dst, i32 %tmp
> + store i32 %tmp2, i32 addrspace(1)* %arrayidx, align 4
> + ret void
> +}
> +
> +; Should generate flat load
> +; CHECK-LABEL: @generic_address_bug9749(
> +; CHECK: br i1
> +; CHECK: load float, float addrspace(4)*
> +; CHECK: br label
> +define void @generic_address_bug9749(i32 addrspace(1)* nocapture
> %results) #0 {
> +entry:
> + %ptr = alloca float addrspace(4)*, align 8
> + %tmp = call i32 @llvm.amdgcn.workitem.id.x()
> + %tmp1 = zext i32 %tmp to i64
> + store float 0x3FB99999A0000000, float addrspace(1)*
> @generic_address_bug9749.val, align 4
> + store volatile float addrspace(4)* addrspacecast (float addrspace(1)*
> @generic_address_bug9749.val to float addrspace(4)*), float addrspace(4)**
> %ptr, align 8
> + %tmp2 = load volatile float addrspace(4)*, float addrspace(4)** %ptr,
> align 8
> + %tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val,
> align 4
> + %tmp4 = bitcast float addrspace(4)* %tmp2 to i8 addrspace(4)*
> + %call.i = call i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)* %tmp4) #1
> + %switch.i.i = icmp ult i32 %call.i, 4
> + br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit
> +
> +if.end.i: ; preds = %entry
> + %tmp5 = load float, float addrspace(4)* %tmp2, align 4
> + %not.cmp.i = fcmp oeq float %tmp5, %tmp3
> + %phitmp = zext i1 %not.cmp.i to i32
> + br label %helperFunction.exit
> +
> +helperFunction.exit: ; preds = %if.end.i,
> %entry
> + %retval.0.i = phi i32 [ 0, %entry ], [ %phitmp, %if.end.i ]
> + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64
> %tmp1
> + store i32 %retval.0.i, i32 addrspace(1)* %arrayidx, align 4
> + ret void
> +}
> +
> +; CHECK-LABEL: @generic_address_opt_phi_bug9776_simple_phi_kernel(
> +; CHECK: phi i32 addrspace(3)*
> +; CHECK: store i32 %i.03, i32 addrspace(3)* %
> +define void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32
> addrspace(3)* nocapture %in, i32 %numElems) #0 {
> +entry:
> + %cmp1 = icmp eq i32 %numElems, 0
> + br i1 %cmp1, label %for.end, label %for.body.lr.ph
> +
> +for.body.lr.ph: ; preds = %entry
> + %tmp = addrspacecast i32 addrspace(3)* %in to i32 addrspace(4)*
> + br label %for.body
> +
> +for.body: ; preds = %for.body, %
> for.body.lr.ph
> + %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
> + %ptr.02 = phi i32 addrspace(4)* [ %tmp, %for.body.lr.ph ], [ %add.ptr,
> %for.body ]
> + store i32 %i.03, i32 addrspace(4)* %ptr.02, align 4
> + %add.ptr = getelementptr inbounds i32, i32 addrspace(4)* %ptr.02, i64 4
> + %inc = add nuw i32 %i.03, 1
> + %exitcond = icmp eq i32 %inc, %numElems
> + br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end: ; preds = %for.body,
> %entry
> + ret void
> +}
> +
> +; CHECK-LABEL: @generic_address_bug9899(
> +; CHECK: %vecload = load <2 x i32>, <2 x i32> addrspace(3)*
> +; CHECK: store <2 x i32> %tmp16, <2 x i32> addrspace(3)*
> +define void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)*
> nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 {
> +entry:
> + %tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
> + %tmp2 = zext i32 %tmp1 to i64
> + %tmp3 = add i64 %tmp2, %arg0
> + %sext = shl i64 %tmp3, 32
> + %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32 addrspace(4)*
> + %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32 addrspace(4)*
> + %tmp6 = ashr exact i64 %sext, 31
> + %tmp7 = getelementptr inbounds i32, i32 addrspace(4)* %tmp5, i64 %tmp6
> + %arrayidx_v4 = bitcast i32 addrspace(4)* %tmp7 to <2 x i32>
> addrspace(4)*
> + %vecload = load <2 x i32>, <2 x i32> addrspace(4)* %arrayidx_v4, align 4
> + %tmp8 = extractelement <2 x i32> %vecload, i32 0
> + %tmp9 = extractelement <2 x i32> %vecload, i32 1
> + %tmp10 = icmp eq i32 %tmp8, 0
> + %tmp11 = select i1 %tmp10, i32 32, i32 %tmp8
> + %tmp12 = icmp eq i32 %tmp9, 0
> + %tmp13 = select i1 %tmp12, i32 32, i32 %tmp9
> + %tmp14 = getelementptr inbounds i32, i32 addrspace(4)* %tmp4, i64 %tmp6
> + %tmp15 = insertelement <2 x i32> undef, i32 %tmp11, i32 0
> + %tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1
> + %arrayidx_v41 = bitcast i32 addrspace(4)* %tmp14 to <2 x i32>
> addrspace(4)*
> + store <2 x i32> %tmp16, <2 x i32> addrspace(4)* %arrayidx_v41, align 4
> + ret void
> +}
> +
> +declare i32 @llvm.amdgcn.workitem.id.x() #2
> +
> +attributes #0 = { nounwind }
> +attributes #1 = { nounwind readonly }
> +attributes #2 = { nounwind readnone }
> \ No newline at end of file
>
> Added: llvm/trunk/test/Transforms/InferAddressSpaces/lit.local.cfg
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Transforms/InferAddressSpaces/lit.local.cfg?rev=293580&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/Transforms/InferAddressSpaces/lit.local.cfg (added)
> +++ llvm/trunk/test/Transforms/InferAddressSpaces/lit.local.cfg Mon Jan
> 30 19:20:54 2017
> @@ -0,0 +1,3 @@
> +if not 'AMDGPU' in config.root.targets:
> + config.unsupported = True
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170131/b97351ab/attachment.html>
More information about the llvm-commits
mailing list